diff options
-rw-r--r-- | src/libmime/message.c | 7 | ||||
-rw-r--r-- | src/libmime/message.h | 1 | ||||
-rw-r--r-- | src/libserver/task.c | 3 | ||||
-rw-r--r-- | src/libstat/learn_cache/sqlite3_cache.c | 6 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 11 | ||||
-rw-r--r-- | src/plugins/fuzzy_check.c | 16 |
6 files changed, 10 insertions, 34 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 19bef072a..10d7f04f4 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -995,7 +995,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, #endif /* Ugly workaround */ - tmp = rspamd_tokenize_text (part->content->data, + part->normalized_words = rspamd_tokenize_text (part->content->data, part->content->len, IS_PART_UTF (part), task->cfg, part->urls_offset, FALSE, NULL); @@ -1034,7 +1034,6 @@ rspamd_normalize_text_part (struct rspamd_task *task, } } } - part->normalized_words = tmp; } #ifdef WITH_SNOWBALL if (stem != NULL) { @@ -1246,10 +1245,6 @@ process_text_part (struct rspamd_task *task, /* Post process part */ detect_text_language (text_part); - text_part->words = rspamd_tokenize_text (text_part->content->data, - text_part->content->len, IS_PART_UTF (text_part), task->cfg, - text_part->urls_offset, FALSE, - &text_part->hash); rspamd_normalize_text_part (task, text_part); /* Calculate number of lines */ diff --git a/src/libmime/message.h b/src/libmime/message.h index aea5c3750..13ccaa4fa 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -45,7 +45,6 @@ struct mime_text_part { GList *urls_offset; /**< list of offsets of urls */ GMimeObject *parent; struct mime_part *mime_part; - GArray *words; GArray *normalized_words; guint nlines; guint64 hash; diff --git a/src/libserver/task.c b/src/libserver/task.c index 7d34e830b..eea9057ee 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -185,9 +185,6 @@ rspamd_task_free (struct rspamd_task *task) for (i = 0; i < task->text_parts->len; i ++) { tp = g_ptr_array_index (task->text_parts, i); - if (tp->words) { - g_array_free (tp->words, TRUE); - } if (tp->normalized_words) { g_array_free (tp->normalized_words, TRUE); } diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c index 889395b4d..cf4ab615a 100644 --- a/src/libstat/learn_cache/sqlite3_cache.c +++ b/src/libstat/learn_cache/sqlite3_cache.c @@ -257,9 +257,9 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task, for (i = 0; i < task->text_parts->len; i ++) { part = g_ptr_array_index (task->text_parts, i); - if (part->words != NULL) { - for (j = 0; j < part->words->len; j ++) { - word = &g_array_index (part->words, rspamd_ftok_t, j); + if (part->normalized_words != NULL) { + for (j = 0; j < part->normalized_words->len; j ++) { + word = &g_array_index (part->normalized_words, rspamd_ftok_t, j); rspamd_cryptobox_hash_update (&st, word->begin, word->len); } } diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 952330b49..c0aad1930 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -198,17 +198,12 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, for (i = 0; i < task->text_parts->len; i ++) { part = g_ptr_array_index (task->text_parts, i); - if (!IS_PART_EMPTY (part) && part->words != NULL) { - if (compat) { - tok->tokenizer->tokenize_func (tok, task->task_pool, - part->words, IS_PART_UTF (part), NULL); - } - else { - tok->tokenizer->tokenize_func (tok, task->task_pool, + if (!IS_PART_EMPTY (part) && part->normalized_words != NULL) { + tok->tokenizer->tokenize_func (tok, task->task_pool, part->normalized_words, IS_PART_UTF (part), NULL); - } } + if (pdiff != NULL && *pdiff > similarity_treshold) { msg_debug_task ("message has two common parts (%d%%), so skip the last one", *pdiff); diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index cf58eb672..e726419db 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -592,17 +592,7 @@ fuzzy_io_fin (void *ud) static GArray * fuzzy_preprocess_words (struct mime_text_part *part, rspamd_mempool_t *pool) { - GArray *res; - - if (!IS_PART_UTF (part) || !part->language || part->language[0] == '\0' || - part->normalized_words == NULL) { - res = part->words; - } - else { - res = part->normalized_words; - } - - return res; + return part->normalized_words; } /* @@ -1259,14 +1249,14 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, continue; } - if (part->words == NULL || part->words->len == 0) { + if (part->normalized_words == NULL || part->normalized_words->len == 0) { msg_info_task ("<%s>, part hash empty, skip fuzzy check", task->message_id); continue; } if (fuzzy_module_ctx->min_hash_len != 0 && - part->words->len < fuzzy_module_ctx->min_hash_len) { + part->normalized_words->len < fuzzy_module_ctx->min_hash_len) { msg_info_task ( "<%s>, part hash is shorter than %d symbols, skip fuzzy check", task->message_id, |