[CritFix] Another portion of tokenization fixes

MFH: rspamd-1.6
author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2017-10-18 08:18:25 +0100
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2017-10-18 08:18:25 +0100
commit: f53e901f3469cab9e2ec6f5983e66e25c87f5731 (patch)
tree: bd58a4d313a37a9f51cedec7b2805e0353edb2bc /src/libmime/message.c
parent: 1336182634fe880411c081b3002272575c239435 (diff)
download: rspamd-f53e901f3469cab9e2ec6f5983e66e25c87f5731.tar.gz
rspamd-f53e901f3469cab9e2ec6f5983e66e25c87f5731.zip
1 files changed, 14 insertions, 4 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index f426c821d..cae61643c 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -232,10 +232,20 @@ rspamd_extract_words (struct rspamd_task *task,
 	}
 #endif
 	/* Ugly workaround */
-	part->normalized_words = rspamd_tokenize_text (part->content->data,
-			part->content->len, IS_PART_UTF (part), task->cfg,
-			part->exceptions, FALSE,
-			NULL);
+	if (IS_PART_HTML (part)) {
+		part->normalized_words = rspamd_tokenize_text (
+				part->content->data,
+				part->content->len, IS_PART_UTF (part), task->cfg,
+				part->exceptions, FALSE,
+				NULL);
+	}
+	else {
+		part->normalized_words = rspamd_tokenize_text (
+				part->stripped_content->data,
+				part->stripped_content->len, IS_PART_UTF (part), task->cfg,
+				part->exceptions, FALSE,
+				NULL);
+	}
 
 	if (part->normalized_words) {
 		part->normalized_hashes = g_array_sized_new (FALSE, FALSE,
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2017-10-18 08:18:25 +0100
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2017-10-18 08:18:25 +0100
commit	f53e901f3469cab9e2ec6f5983e66e25c87f5731 (patch)
tree	bd58a4d313a37a9f51cedec7b2805e0353edb2bc /src/libmime/message.c
parent	1336182634fe880411c081b3002272575c239435 (diff)
download	rspamd-f53e901f3469cab9e2ec6f5983e66e25c87f5731.tar.gz rspamd-f53e901f3469cab9e2ec6f5983e66e25c87f5731.zip