From 42f3286ae779ab91dd81450cb6cfb8df4d866140 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 7 Jun 2015 16:13:09 +0100 Subject: [PATCH] Rework filters processing. --- src/libserver/symbols_cache.c | 177 +++++++++++++++++++++++++--------- src/libserver/symbols_cache.h | 5 +- src/libserver/task.c | 90 +---------------- 3 files changed, 133 insertions(+), 139 deletions(-) diff --git a/src/libserver/symbols_cache.c b/src/libserver/symbols_cache.c index 32ed6f786..e5a140e4e 100644 --- a/src/libserver/symbols_cache.c +++ b/src/libserver/symbols_cache.c @@ -112,7 +112,12 @@ cache_logic_cmp (const void *p1, const void *p2, gpointer ud) double weight1, weight2; double f1 = 0, f2 = 0, t1, t2; - if (i1->priority == i2->priority) { + if (i1->deps->len != 0 || i2->deps->len != 0) { + /* TODO: handle complex dependencies */ + w1 = -(i1->deps->len); + w2 = -(i2->deps->len); + } + else if (i1->priority == i2->priority) { f1 = (double)i1->frequency / (double)cache->total_freq; f2 = (double)i2->frequency / (double)cache->total_freq; weight1 = abs (i1->weight) / cache->max_weight; @@ -770,12 +775,86 @@ struct cache_savepoint { guint processed_num; guint pass; gint offset; + struct metric_result *rs; + gdouble lim; + GPtrArray *waitq; }; + +static gboolean +check_metric_settings (struct rspamd_task *task, struct metric *metric, + double *score) +{ + const ucl_object_t *mobj, *reject, *act; + double val; + + if (task->settings == NULL) { + return FALSE; + } + + mobj = ucl_object_find_key (task->settings, metric->name); + if (mobj != NULL) { + act = ucl_object_find_key (mobj, "actions"); + if (act != NULL) { + reject = ucl_object_find_key (act, + rspamd_action_to_str (METRIC_ACTION_REJECT)); + if (reject != NULL && ucl_object_todouble_safe (reject, &val)) { + *score = val; + return TRUE; + } + } + } + + return FALSE; +} + +/* Return true if metric has score that is more than spam score for it */ +static gboolean +rspamd_symbols_cache_metric_limit (struct rspamd_task *task, + struct cache_savepoint *cp) +{ + struct metric_result *res; + GList *cur; + struct metric *metric; + double ms; + + cur = task->cfg->metrics_list; + + if (cp->lim == 0.0) { + /* + * Look for metric that has the maximum reject score + */ + while (cur) { + metric = cur->data; + res = g_hash_table_lookup (task->results, metric->name); + + if (res) { + if (!check_metric_settings (task, metric, &ms)) { + ms = metric->actions[METRIC_ACTION_REJECT].score; + } + + if (cp->lim < ms) { + cp->rs = res; + cp->lim = ms; + } + } + + cur = g_list_next (cur); + } + } + + g_assert (cp->rs != NULL); + + if (cp->rs->score > cp->lim) { + return TRUE; + } + + return FALSE; +} + gboolean -rspamd_symbols_cache_process_symbol (struct rspamd_task * task, - struct symbols_cache * cache, - gpointer *save) +rspamd_symbols_cache_process_symbols (struct rspamd_task * task, + struct symbols_cache * cache) { double t1, t2; guint64 diff; @@ -784,70 +863,74 @@ rspamd_symbols_cache_process_symbol (struct rspamd_task * task, gint idx = -1, i; g_assert (cache != NULL); - g_assert (save != NULL); - if (*save == NULL) { - checkpoint = rspamd_mempool_alloc (task->task_pool, sizeof (*checkpoint)); + if (task->checkpoint == NULL) { + checkpoint = rspamd_mempool_alloc0 (task->task_pool, sizeof (*checkpoint)); checkpoint->processed_bits = rspamd_mempool_alloc (task->task_pool, NBYTES (cache->used_items)); /* Inverse to use ffs */ - memset (checkpoint->processed_bits, 0xff, NBYTES (cache->used_items)); - checkpoint->processed_num = 0; - checkpoint->pass = 0; - checkpoint->offset = 0; - *save = checkpoint; + memset (checkpoint->processed_bits, 0x0, NBYTES (cache->used_items)); + checkpoint->waitq = g_ptr_array_new (); + rspamd_mempool_add_destructor (task->task_pool, + rspamd_ptr_array_free_hard, checkpoint->waitq); + task->checkpoint = checkpoint; + + rspamd_create_metric_result (task, DEFAULT_METRIC); + if (task->settings) { + const ucl_object_t *wl; + + wl = ucl_object_find_key (task->settings, "whitelist"); + if (wl != NULL) { + msg_info ("<%s> is whitelisted", task->message_id); + task->flags |= RSPAMD_TASK_FLAG_SKIP; + return TRUE; + } + } } else { - checkpoint = *save; + checkpoint = task->checkpoint; } if (checkpoint->processed_num >= cache->used_items) { /* All symbols are processed */ - return FALSE; + return TRUE; } - /* TODO: too slow approach */ for (i = checkpoint->offset * NBBY; i < (gint)cache->used_items; i ++) { - if (isset (checkpoint->processed_bits, i)) { - idx = i; - break; + if (rspamd_symbols_cache_metric_limit (task, checkpoint)) { + msg_info ("<%s> has already scored more than %.2f, so do not " + "plan any more checks", task->message_id, + checkpoint->rs->score); + return TRUE; } - } - - if (idx >= (checkpoint->offset + 1) * NBBY) { - checkpoint->offset ++; - } - g_assert (idx >= 0 && idx < (gint)cache->items_by_order->len); - item = g_ptr_array_index (cache->items_by_order, idx); + item = g_ptr_array_index (cache->items_by_order, idx); + if (!isset (checkpoint->processed_bits, i)) { + if (item->type == SYMBOL_TYPE_NORMAL || item->type == SYMBOL_TYPE_CALLBACK) { + g_assert (item->func != NULL); + t1 = rspamd_get_ticks (); + + if (item->symbol != NULL && + G_UNLIKELY (check_debug_symbol (task->cfg, item->symbol))) { + rspamd_log_debug (rspamd_main->logger); + item->func (task, item->user_data); + rspamd_log_nodebug (rspamd_main->logger); + } + else { + item->func (task, item->user_data); + } - if (!item) { - return FALSE; - } + t2 = rspamd_get_ticks (); - if (item->type == SYMBOL_TYPE_NORMAL || item->type == SYMBOL_TYPE_CALLBACK) { - g_assert (item->func != NULL); - t1 = rspamd_get_ticks (); + diff = (t2 - t1) * 1000000; + rspamd_set_counter (item, diff); + } - if (item->symbol != NULL && - G_UNLIKELY (check_debug_symbol (task->cfg, item->symbol))) { - rspamd_log_debug (rspamd_main->logger); - item->func (task, item->user_data); - rspamd_log_nodebug (rspamd_main->logger); - } - else { - item->func (task, item->user_data); + setbit (checkpoint->processed_bits, idx); + checkpoint->processed_num ++; } - - t2 = rspamd_get_ticks (); - - diff = (t2 - t1) * 1000000; - rspamd_set_counter (item, diff); } - clrbit (checkpoint->processed_bits, idx); - checkpoint->processed_num ++; - return TRUE; } diff --git a/src/libserver/symbols_cache.h b/src/libserver/symbols_cache.h index 014a825dc..bf60e9aba 100644 --- a/src/libserver/symbols_cache.h +++ b/src/libserver/symbols_cache.h @@ -136,9 +136,8 @@ gint rspamd_symbols_cache_add_symbol (struct symbols_cache *cache, * @param cache symbols cache * @param saved_item pointer to currently saved item */ -gboolean rspamd_symbols_cache_process_symbol (struct rspamd_task *task, - struct symbols_cache *cache, - gpointer *save); +gboolean rspamd_symbols_cache_process_symbols (struct rspamd_task *task, + struct symbols_cache *cache); /** * Validate cache items agains theirs weights defined in metrics diff --git a/src/libserver/task.c b/src/libserver/task.c index d5a7243a3..901fc1963 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -311,99 +311,11 @@ rspamd_task_select_processing_stage (struct rspamd_task *task, guint stages) return RSPAMD_TASK_STAGE_DONE; } -static gboolean -check_metric_settings (struct rspamd_task *task, struct metric *metric, - double *score) -{ - const ucl_object_t *mobj, *reject, *act; - double val; - - if (task->settings == NULL) { - return FALSE; - } - - mobj = ucl_object_find_key (task->settings, metric->name); - if (mobj != NULL) { - act = ucl_object_find_key (mobj, "actions"); - if (act != NULL) { - reject = ucl_object_find_key (act, - rspamd_action_to_str (METRIC_ACTION_REJECT)); - if (reject != NULL && ucl_object_todouble_safe (reject, &val)) { - *score = val; - return TRUE; - } - } - } - - return FALSE; -} - -/* Return true if metric has score that is more than spam score for it */ -static gboolean -check_metric_is_spam (struct rspamd_task *task, struct metric *metric) -{ - struct metric_result *res; - double ms; - - res = g_hash_table_lookup (task->results, metric->name); - if (res) { - if (!check_metric_settings (task, metric, &ms)) { - ms = metric->actions[METRIC_ACTION_REJECT].score; - } - return (ms > 0 && res->score >= ms); - } - - return FALSE; -} - static gboolean rspamd_process_filters (struct rspamd_task *task) { - GList *cur; - struct metric *metric; - gpointer item = NULL; - - /* Insert default metric to be sure that it exists all the time */ - - if (task->checkpoint == NULL) { - rspamd_create_metric_result (task, DEFAULT_METRIC); - if (task->settings) { - const ucl_object_t *wl; - - wl = ucl_object_find_key (task->settings, "whitelist"); - if (wl != NULL) { - msg_info ("<%s> is whitelisted", task->message_id); - task->flags |= RSPAMD_TASK_FLAG_SKIP; - return TRUE; - } - } - - task->checkpoint = GUINT_TO_POINTER (0x1); - } - else { - /* TODO: in future, we need to add dependencies here */ - return TRUE; - } - /* Process metrics symbols */ - while (rspamd_symbols_cache_process_symbol (task, task->cfg->cache, &item)) { - /* Check reject actions */ - cur = task->cfg->metrics_list; - while (cur) { - metric = cur->data; - if (!(task->flags & RSPAMD_TASK_FLAG_PASS_ALL) && - metric->actions[METRIC_ACTION_REJECT].score > 0 && - check_metric_is_spam (task, metric)) { - msg_info ("<%s> has already scored more than %.2f, so do not " - "plan any more checks", task->message_id, - metric->actions[METRIC_ACTION_REJECT].score); - return TRUE; - } - cur = g_list_next (cur); - } - } - - return TRUE; + return rspamd_symbols_cache_process_symbols (task, task->cfg->cache); } gboolean -- 2.39.5