summaryrefslogtreecommitdiffstats
path: root/src/libstat
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-11-26 17:04:55 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-11-26 17:04:55 +0000
commit22259fdbd6e2a0a99b6c29df2dd4f1e2eedfe66f (patch)
tree117811cdcf409bb81013729a67dc1cfa672324c9 /src/libstat
parentb840e3afa41c0e7a53de05e989bc647b08fe842d (diff)
downloadrspamd-22259fdbd6e2a0a99b6c29df2dd4f1e2eedfe66f.tar.gz
rspamd-22259fdbd6e2a0a99b6c29df2dd4f1e2eedfe66f.zip
Remove legacy words, use merely normalized_words
Diffstat (limited to 'src/libstat')
-rw-r--r--src/libstat/learn_cache/sqlite3_cache.c6
-rw-r--r--src/libstat/stat_process.c11
2 files changed, 6 insertions, 11 deletions
diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c
index 889395b4d..cf4ab615a 100644
--- a/src/libstat/learn_cache/sqlite3_cache.c
+++ b/src/libstat/learn_cache/sqlite3_cache.c
@@ -257,9 +257,9 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task,
for (i = 0; i < task->text_parts->len; i ++) {
part = g_ptr_array_index (task->text_parts, i);
- if (part->words != NULL) {
- for (j = 0; j < part->words->len; j ++) {
- word = &g_array_index (part->words, rspamd_ftok_t, j);
+ if (part->normalized_words != NULL) {
+ for (j = 0; j < part->normalized_words->len; j ++) {
+ word = &g_array_index (part->normalized_words, rspamd_ftok_t, j);
rspamd_cryptobox_hash_update (&st, word->begin, word->len);
}
}
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 952330b49..c0aad1930 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -198,17 +198,12 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
for (i = 0; i < task->text_parts->len; i ++) {
part = g_ptr_array_index (task->text_parts, i);
- if (!IS_PART_EMPTY (part) && part->words != NULL) {
- if (compat) {
- tok->tokenizer->tokenize_func (tok, task->task_pool,
- part->words, IS_PART_UTF (part), NULL);
- }
- else {
- tok->tokenizer->tokenize_func (tok, task->task_pool,
+ if (!IS_PART_EMPTY (part) && part->normalized_words != NULL) {
+ tok->tokenizer->tokenize_func (tok, task->task_pool,
part->normalized_words, IS_PART_UTF (part), NULL);
- }
}
+
if (pdiff != NULL && *pdiff > similarity_treshold) {
msg_debug_task ("message has two common parts (%d%%), so skip the last one",
*pdiff);