summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libmime/message.c7
-rw-r--r--src/libmime/message.h1
-rw-r--r--src/libserver/task.c3
-rw-r--r--src/libstat/learn_cache/sqlite3_cache.c6
-rw-r--r--src/libstat/stat_process.c11
-rw-r--r--src/plugins/fuzzy_check.c16
6 files changed, 10 insertions, 34 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 19bef072a..10d7f04f4 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -995,7 +995,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
#endif
/* Ugly workaround */
- tmp = rspamd_tokenize_text (part->content->data,
+ part->normalized_words = rspamd_tokenize_text (part->content->data,
part->content->len, IS_PART_UTF (part), task->cfg,
part->urls_offset, FALSE,
NULL);
@@ -1034,7 +1034,6 @@ rspamd_normalize_text_part (struct rspamd_task *task,
}
}
}
- part->normalized_words = tmp;
}
#ifdef WITH_SNOWBALL
if (stem != NULL) {
@@ -1246,10 +1245,6 @@ process_text_part (struct rspamd_task *task,
/* Post process part */
detect_text_language (text_part);
- text_part->words = rspamd_tokenize_text (text_part->content->data,
- text_part->content->len, IS_PART_UTF (text_part), task->cfg,
- text_part->urls_offset, FALSE,
- &text_part->hash);
rspamd_normalize_text_part (task, text_part);
/* Calculate number of lines */
diff --git a/src/libmime/message.h b/src/libmime/message.h
index aea5c3750..13ccaa4fa 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -45,7 +45,6 @@ struct mime_text_part {
GList *urls_offset; /**< list of offsets of urls */
GMimeObject *parent;
struct mime_part *mime_part;
- GArray *words;
GArray *normalized_words;
guint nlines;
guint64 hash;
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 7d34e830b..eea9057ee 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -185,9 +185,6 @@ rspamd_task_free (struct rspamd_task *task)
for (i = 0; i < task->text_parts->len; i ++) {
tp = g_ptr_array_index (task->text_parts, i);
- if (tp->words) {
- g_array_free (tp->words, TRUE);
- }
if (tp->normalized_words) {
g_array_free (tp->normalized_words, TRUE);
}
diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c
index 889395b4d..cf4ab615a 100644
--- a/src/libstat/learn_cache/sqlite3_cache.c
+++ b/src/libstat/learn_cache/sqlite3_cache.c
@@ -257,9 +257,9 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task,
for (i = 0; i < task->text_parts->len; i ++) {
part = g_ptr_array_index (task->text_parts, i);
- if (part->words != NULL) {
- for (j = 0; j < part->words->len; j ++) {
- word = &g_array_index (part->words, rspamd_ftok_t, j);
+ if (part->normalized_words != NULL) {
+ for (j = 0; j < part->normalized_words->len; j ++) {
+ word = &g_array_index (part->normalized_words, rspamd_ftok_t, j);
rspamd_cryptobox_hash_update (&st, word->begin, word->len);
}
}
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 952330b49..c0aad1930 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -198,17 +198,12 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
for (i = 0; i < task->text_parts->len; i ++) {
part = g_ptr_array_index (task->text_parts, i);
- if (!IS_PART_EMPTY (part) && part->words != NULL) {
- if (compat) {
- tok->tokenizer->tokenize_func (tok, task->task_pool,
- part->words, IS_PART_UTF (part), NULL);
- }
- else {
- tok->tokenizer->tokenize_func (tok, task->task_pool,
+ if (!IS_PART_EMPTY (part) && part->normalized_words != NULL) {
+ tok->tokenizer->tokenize_func (tok, task->task_pool,
part->normalized_words, IS_PART_UTF (part), NULL);
- }
}
+
if (pdiff != NULL && *pdiff > similarity_treshold) {
msg_debug_task ("message has two common parts (%d%%), so skip the last one",
*pdiff);
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index cf58eb672..e726419db 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -592,17 +592,7 @@ fuzzy_io_fin (void *ud)
static GArray *
fuzzy_preprocess_words (struct mime_text_part *part, rspamd_mempool_t *pool)
{
- GArray *res;
-
- if (!IS_PART_UTF (part) || !part->language || part->language[0] == '\0' ||
- part->normalized_words == NULL) {
- res = part->words;
- }
- else {
- res = part->normalized_words;
- }
-
- return res;
+ return part->normalized_words;
}
/*
@@ -1259,14 +1249,14 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
continue;
}
- if (part->words == NULL || part->words->len == 0) {
+ if (part->normalized_words == NULL || part->normalized_words->len == 0) {
msg_info_task ("<%s>, part hash empty, skip fuzzy check",
task->message_id);
continue;
}
if (fuzzy_module_ctx->min_hash_len != 0 &&
- part->words->len < fuzzy_module_ctx->min_hash_len) {
+ part->normalized_words->len < fuzzy_module_ctx->min_hash_len) {
msg_info_task (
"<%s>, part hash is shorter than %d symbols, skip fuzzy check",
task->message_id,