aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-04-01 14:54:57 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-04-01 14:54:57 +0100
commit5bc3b26c98812d5a1bc1c4753ad656b403bf1e3a (patch)
treeb3a29ce4393757cda92256639f038bd8028e4116 /src/libmime
parentd3764043ea8040e5875828a0c1b319298fea29cf (diff)
downloadrspamd-5bc3b26c98812d5a1bc1c4753ad656b403bf1e3a.tar.gz
rspamd-5bc3b26c98812d5a1bc1c4753ad656b403bf1e3a.zip
Add new UTF8 tokenizer.
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/message.c9
1 files changed, 6 insertions, 3 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index ebf12b413..8f7a9d5c8 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1190,8 +1190,11 @@ rspamd_normalize_text_part (struct rspamd_task *task,
}
}
- part->normalized_words = g_array_sized_new (FALSE, FALSE,
- sizeof (rspamd_fstring_t), part->words->len);
+ /* Ugly workaround */
+ part->normalized_words = rspamd_tokenize_text (part->content->data,
+ part->content->len, part->is_utf, task->cfg->min_word_len,
+ part->urls_offset, FALSE);
+
for (i = 0; i < part->words->len; i ++) {
w = &g_array_index (part->words, rspamd_fstring_t, i);
if (stem) {
@@ -1324,7 +1327,7 @@ process_text_part (struct rspamd_task *task,
detect_text_language (text_part);
text_part->words = rspamd_tokenize_text (text_part->content->data,
text_part->content->len, text_part->is_utf, task->cfg->min_word_len,
- &text_part->urls_offset);
+ text_part->urls_offset, TRUE);
rspamd_normalize_text_part (task, text_part);
}