diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-10-18 08:18:25 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-10-18 08:18:25 +0100 |
commit | f53e901f3469cab9e2ec6f5983e66e25c87f5731 (patch) | |
tree | bd58a4d313a37a9f51cedec7b2805e0353edb2bc /src/libmime/message.c | |
parent | 1336182634fe880411c081b3002272575c239435 (diff) | |
download | rspamd-f53e901f3469cab9e2ec6f5983e66e25c87f5731.tar.gz rspamd-f53e901f3469cab9e2ec6f5983e66e25c87f5731.zip |
[CritFix] Another portion of tokenization fixes
MFH: rspamd-1.6
Diffstat (limited to 'src/libmime/message.c')
-rw-r--r-- | src/libmime/message.c | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index f426c821d..cae61643c 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -232,10 +232,20 @@ rspamd_extract_words (struct rspamd_task *task, } #endif /* Ugly workaround */ - part->normalized_words = rspamd_tokenize_text (part->content->data, - part->content->len, IS_PART_UTF (part), task->cfg, - part->exceptions, FALSE, - NULL); + if (IS_PART_HTML (part)) { + part->normalized_words = rspamd_tokenize_text ( + part->content->data, + part->content->len, IS_PART_UTF (part), task->cfg, + part->exceptions, FALSE, + NULL); + } + else { + part->normalized_words = rspamd_tokenize_text ( + part->stripped_content->data, + part->stripped_content->len, IS_PART_UTF (part), task->cfg, + part->exceptions, FALSE, + NULL); + } if (part->normalized_words) { part->normalized_hashes = g_array_sized_new (FALSE, FALSE, |