diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-09-05 17:43:20 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-09-05 17:44:35 +0100 |
commit | a64ce9b4245153e68fbbcd9c6610b9c1ccf76493 (patch) | |
tree | b9b9798b77974cf8d0793c948966a95963266771 /src/libmime/message.h | |
parent | 3807688a67be66d00a24172c13b00b6fb1816d69 (diff) | |
download | rspamd-a64ce9b4245153e68fbbcd9c6610b9c1ccf76493.tar.gz rspamd-a64ce9b4245153e68fbbcd9c6610b9c1ccf76493.zip |
[Rework] Rework utf content processing in text parts
- Store unicode in UTF parts
- Store unicode for HTML parts
- Rename struct fields and split them into unicode/utf components
Diffstat (limited to 'src/libmime/message.h')
-rw-r--r-- | src/libmime/message.h | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/src/libmime/message.h b/src/libmime/message.h index baabb762a..e4b5a3d4b 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -86,20 +86,28 @@ struct rspamd_mime_text_part { const gchar *language; GPtrArray *languages; const gchar *real_charset; + + /* Raw data in native encoding */ rspamd_ftok_t raw; rspamd_ftok_t parsed; /* decoded from mime encodings */ - GByteArray *content; /* utf8 encoded processed content */ - GArray *ucs_raw_content; /* unicode raw content (of UChar) */ + /* UTF8 content */ + GByteArray *utf_content; /* utf8 encoded processed content */ GByteArray *utf_raw_content; /* utf raw content */ - GByteArray *stripped_content; /* utf content with no newlines */ + GByteArray *utf_stripped_content; /* utf content with no newlines */ + GArray *normalized_hashes; + GArray *utf_words; + + /* Unicode content, used by libicu */ + GArray *unicode_raw_content; /* unicode raw content (of UChar) */ + GArray *unicode_content; /* unicode processed content (of UChar) */ + GArray *unicode_words; + GPtrArray *newlines; /**< positions of newlines in text, relative to content*/ struct html_content *html; GList *exceptions; /**< list of offsets of urls */ struct rspamd_mime_part *mime_part; - GArray *normalized_words; - GArray *ucs32_words; - GArray *normalized_hashes; + guint flags; guint nlines; guint spaces; @@ -110,7 +118,6 @@ struct rspamd_mime_text_part { guint empty_lines; guint capital_letters; guint numeric_characters; - guint ucs_len; }; enum rspamd_received_type { |