diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-04-01 14:54:57 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-04-01 14:54:57 +0100 |
commit | 5bc3b26c98812d5a1bc1c4753ad656b403bf1e3a (patch) | |
tree | b3a29ce4393757cda92256639f038bd8028e4116 /src/libmime | |
parent | d3764043ea8040e5875828a0c1b319298fea29cf (diff) | |
download | rspamd-5bc3b26c98812d5a1bc1c4753ad656b403bf1e3a.tar.gz rspamd-5bc3b26c98812d5a1bc1c4753ad656b403bf1e3a.zip |
Add new UTF8 tokenizer.
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/message.c | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index ebf12b413..8f7a9d5c8 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1190,8 +1190,11 @@ rspamd_normalize_text_part (struct rspamd_task *task, } } - part->normalized_words = g_array_sized_new (FALSE, FALSE, - sizeof (rspamd_fstring_t), part->words->len); + /* Ugly workaround */ + part->normalized_words = rspamd_tokenize_text (part->content->data, + part->content->len, part->is_utf, task->cfg->min_word_len, + part->urls_offset, FALSE); + for (i = 0; i < part->words->len; i ++) { w = &g_array_index (part->words, rspamd_fstring_t, i); if (stem) { @@ -1324,7 +1327,7 @@ process_text_part (struct rspamd_task *task, detect_text_language (text_part); text_part->words = rspamd_tokenize_text (text_part->content->data, text_part->content->len, text_part->is_utf, task->cfg->min_word_len, - &text_part->urls_offset); + text_part->urls_offset, TRUE); rspamd_normalize_text_part (task, text_part); } |