aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-06 16:24:28 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-06 19:50:18 +0100
commitaf5f57916e4345d988802794c84460960ee47d0c (patch)
tree66448c2c64a3dee466b932999a41a9ce4bb88991 /src/libmime
parent714cc7ffd10a864477488b759c54205c91a99bf8 (diff)
downloadrspamd-af5f57916e4345d988802794c84460960ee47d0c.tar.gz
rspamd-af5f57916e4345d988802794c84460960ee47d0c.zip
[Minor] Add UText wrapper for stripped content
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/message.c15
-rw-r--r--src/libmime/message.h2
2 files changed, 16 insertions, 1 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 388ab0aa3..e59d34b25 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -495,11 +495,11 @@ static void
rspamd_normalize_text_part (struct rspamd_task *task,
struct rspamd_mime_text_part *part)
{
-
const gchar *p, *end;
guint i;
goffset off;
struct rspamd_process_exception *ex;
+ UErrorCode uc_err = U_ZERO_ERROR;
part->newlines = g_ptr_array_sized_new (128);
@@ -526,6 +526,18 @@ rspamd_normalize_text_part (struct rspamd_task *task,
}
}
+ if (IS_PART_UTF (part)) {
+ utext_openUTF8 (&part->utf_stripped_text,
+ part->utf_stripped_content->data,
+ part->utf_stripped_content->len,
+ &uc_err);
+
+ if (!U_SUCCESS (uc_err)) {
+ msg_warn_task ("cannot open text from utf content");
+ /* Probably, should be an assertion */
+ }
+ }
+
rspamd_mempool_add_destructor (task->task_pool,
(rspamd_mempool_destruct_t) free_byte_array_callback,
part->utf_stripped_content);
@@ -833,6 +845,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
text_part->raw.len = mime_part->raw_data.len;
text_part->parsed.begin = mime_part->parsed_data.begin;
text_part->parsed.len = mime_part->parsed_data.len;
+ text_part->utf_stripped_text = (UText)UTEXT_INITIALIZER;
if (found_html) {
if (!rspamd_message_process_html_text_part (task, text_part)) {
diff --git a/src/libmime/message.h b/src/libmime/message.h
index e4b5a3d4b..f4dbdaa72 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -14,6 +14,7 @@
#include "content_type.h"
#include <unicode/uchar.h>
+#include <unicode/utext.h>
struct rspamd_task;
struct controller_session;
@@ -97,6 +98,7 @@ struct rspamd_mime_text_part {
GByteArray *utf_stripped_content; /* utf content with no newlines */
GArray *normalized_hashes;
GArray *utf_words;
+ UText utf_stripped_text; /* Used by libicu to represent the utf8 content */
/* Unicode content, used by libicu */
GArray *unicode_raw_content; /* unicode raw content (of UChar) */