From c4105fc43199d51af271bc24d3345aa57906d973 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 12 Jul 2011 18:35:47 +0400 Subject: Cache data of parts distance function to speed up multiply rules with such function. --- src/cfg_file.h | 2 ++ src/expressions.c | 23 ++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/cfg_file.h b/src/cfg_file.h index 268906d40..7e2dfd413 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -189,6 +189,8 @@ struct statfile { statfile_normalize_func normalizer; /**< function that is used as normaliser */ void *normalizer_data; /**< normalizer function params */ gchar *normalizer_str; /**< source string (for dump) */ + GHashTable *opts; /**< different statfile options */ + gboolean is_spam; /**< spam flag */ }; /** diff --git a/src/expressions.c b/src/expressions.c index fa6ce0fef..d330526de 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -1016,7 +1016,7 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused) struct expression_argument *arg; GMimeObject *parent; const GMimeContentType *ct; - + gint *pdiff; if (args == NULL) { debug_task ("no threshold is specified, assume it 100"); @@ -1032,10 +1032,23 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused) } } + if ((pdiff = memory_pool_get_variable (task->task_pool, "parts_distance")) != NULL) { + diff = *pdiff; + if (diff != -1 && diff <= threshold) { + return TRUE; + } + else { + return FALSE; + } + } + if (g_list_length (task->text_parts) == 2) { cur = g_list_first (task->text_parts); p1 = cur->data; cur = g_list_next (cur); + pdiff = memory_pool_alloc (task->task_pool, sizeof (gint)); + *pdiff = -1; + if (cur == NULL) { msg_info ("bad parts list"); return FALSE; @@ -1051,30 +1064,38 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused) if (ct == NULL || ! g_mime_content_type_is_type ((GMimeContentType *)ct, "multipart", "alternative")) { #endif debug_task ("two parts are not belong to multipart/alternative container, skip check"); + memory_pool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); return FALSE; } } else { debug_task ("message contains two parts but they are in different multi-parts"); + memory_pool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); return FALSE; } if (!p1->is_empty && !p2->is_empty) { diff = fuzzy_compare_parts (p1, p2); debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold); + *pdiff = diff; + memory_pool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); if (diff <= threshold) { return TRUE; } } else if ((p1->is_empty && !p2->is_empty) || (!p1->is_empty && p2->is_empty)) { /* Empty and non empty parts are different */ + *pdiff = 0; + memory_pool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); return TRUE; } } else { debug_task ("message has too many text parts, so do not try to compare them with each other"); + memory_pool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); return FALSE; } + memory_pool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); return FALSE; } -- cgit v1.2.3