From 22259fdbd6e2a0a99b6c29df2dd4f1e2eedfe66f Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 26 Nov 2015 17:04:55 +0000 Subject: Remove legacy words, use merely normalized_words --- src/libstat/learn_cache/sqlite3_cache.c | 6 +++--- src/libstat/stat_process.c | 11 +++-------- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'src/libstat') diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c index 889395b4d..cf4ab615a 100644 --- a/src/libstat/learn_cache/sqlite3_cache.c +++ b/src/libstat/learn_cache/sqlite3_cache.c @@ -257,9 +257,9 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task, for (i = 0; i < task->text_parts->len; i ++) { part = g_ptr_array_index (task->text_parts, i); - if (part->words != NULL) { - for (j = 0; j < part->words->len; j ++) { - word = &g_array_index (part->words, rspamd_ftok_t, j); + if (part->normalized_words != NULL) { + for (j = 0; j < part->normalized_words->len; j ++) { + word = &g_array_index (part->normalized_words, rspamd_ftok_t, j); rspamd_cryptobox_hash_update (&st, word->begin, word->len); } } diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 952330b49..c0aad1930 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -198,17 +198,12 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, for (i = 0; i < task->text_parts->len; i ++) { part = g_ptr_array_index (task->text_parts, i); - if (!IS_PART_EMPTY (part) && part->words != NULL) { - if (compat) { - tok->tokenizer->tokenize_func (tok, task->task_pool, - part->words, IS_PART_UTF (part), NULL); - } - else { - tok->tokenizer->tokenize_func (tok, task->task_pool, + if (!IS_PART_EMPTY (part) && part->normalized_words != NULL) { + tok->tokenizer->tokenize_func (tok, task->task_pool, part->normalized_words, IS_PART_UTF (part), NULL); - } } + if (pdiff != NULL && *pdiff > similarity_treshold) { msg_debug_task ("message has two common parts (%d%%), so skip the last one", *pdiff); -- cgit v1.2.3