aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime/message.h
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-05 17:43:20 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-05 17:44:35 +0100
commita64ce9b4245153e68fbbcd9c6610b9c1ccf76493 (patch)
treeb9b9798b77974cf8d0793c948966a95963266771 /src/libmime/message.h
parent3807688a67be66d00a24172c13b00b6fb1816d69 (diff)
downloadrspamd-a64ce9b4245153e68fbbcd9c6610b9c1ccf76493.tar.gz
rspamd-a64ce9b4245153e68fbbcd9c6610b9c1ccf76493.zip
[Rework] Rework utf content processing in text parts
- Store unicode in UTF parts - Store unicode for HTML parts - Rename struct fields and split them into unicode/utf components
Diffstat (limited to 'src/libmime/message.h')
-rw-r--r--src/libmime/message.h21
1 files changed, 14 insertions, 7 deletions
diff --git a/src/libmime/message.h b/src/libmime/message.h
index baabb762a..e4b5a3d4b 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -86,20 +86,28 @@ struct rspamd_mime_text_part {
const gchar *language;
GPtrArray *languages;
const gchar *real_charset;
+
+ /* Raw data in native encoding */
rspamd_ftok_t raw;
rspamd_ftok_t parsed; /* decoded from mime encodings */
- GByteArray *content; /* utf8 encoded processed content */
- GArray *ucs_raw_content; /* unicode raw content (of UChar) */
+ /* UTF8 content */
+ GByteArray *utf_content; /* utf8 encoded processed content */
GByteArray *utf_raw_content; /* utf raw content */
- GByteArray *stripped_content; /* utf content with no newlines */
+ GByteArray *utf_stripped_content; /* utf content with no newlines */
+ GArray *normalized_hashes;
+ GArray *utf_words;
+
+ /* Unicode content, used by libicu */
+ GArray *unicode_raw_content; /* unicode raw content (of UChar) */
+ GArray *unicode_content; /* unicode processed content (of UChar) */
+ GArray *unicode_words;
+
GPtrArray *newlines; /**< positions of newlines in text, relative to content*/
struct html_content *html;
GList *exceptions; /**< list of offsets of urls */
struct rspamd_mime_part *mime_part;
- GArray *normalized_words;
- GArray *ucs32_words;
- GArray *normalized_hashes;
+
guint flags;
guint nlines;
guint spaces;
@@ -110,7 +118,6 @@ struct rspamd_mime_text_part {
guint empty_lines;
guint capital_letters;
guint numeric_characters;
- guint ucs_len;
};
enum rspamd_received_type {