diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-11-26 17:04:55 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-11-26 17:04:55 +0000 |
commit | 22259fdbd6e2a0a99b6c29df2dd4f1e2eedfe66f (patch) | |
tree | 117811cdcf409bb81013729a67dc1cfa672324c9 /src/libstat | |
parent | b840e3afa41c0e7a53de05e989bc647b08fe842d (diff) | |
download | rspamd-22259fdbd6e2a0a99b6c29df2dd4f1e2eedfe66f.tar.gz rspamd-22259fdbd6e2a0a99b6c29df2dd4f1e2eedfe66f.zip |
Remove legacy words, use merely normalized_words
Diffstat (limited to 'src/libstat')
-rw-r--r-- | src/libstat/learn_cache/sqlite3_cache.c | 6 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 11 |
2 files changed, 6 insertions, 11 deletions
diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c index 889395b4d..cf4ab615a 100644 --- a/src/libstat/learn_cache/sqlite3_cache.c +++ b/src/libstat/learn_cache/sqlite3_cache.c @@ -257,9 +257,9 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task, for (i = 0; i < task->text_parts->len; i ++) { part = g_ptr_array_index (task->text_parts, i); - if (part->words != NULL) { - for (j = 0; j < part->words->len; j ++) { - word = &g_array_index (part->words, rspamd_ftok_t, j); + if (part->normalized_words != NULL) { + for (j = 0; j < part->normalized_words->len; j ++) { + word = &g_array_index (part->normalized_words, rspamd_ftok_t, j); rspamd_cryptobox_hash_update (&st, word->begin, word->len); } } diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 952330b49..c0aad1930 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -198,17 +198,12 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, for (i = 0; i < task->text_parts->len; i ++) { part = g_ptr_array_index (task->text_parts, i); - if (!IS_PART_EMPTY (part) && part->words != NULL) { - if (compat) { - tok->tokenizer->tokenize_func (tok, task->task_pool, - part->words, IS_PART_UTF (part), NULL); - } - else { - tok->tokenizer->tokenize_func (tok, task->task_pool, + if (!IS_PART_EMPTY (part) && part->normalized_words != NULL) { + tok->tokenizer->tokenize_func (tok, task->task_pool, part->normalized_words, IS_PART_UTF (part), NULL); - } } + if (pdiff != NULL && *pdiff > similarity_treshold) { msg_debug_task ("message has two common parts (%d%%), so skip the last one", *pdiff); |