]> source.dussan.org Git - rspamd.git/commitdiff
Rework filters processing.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 7 Jun 2015 15:13:09 +0000 (16:13 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 7 Jun 2015 15:13:09 +0000 (16:13 +0100)
src/libserver/symbols_cache.c
src/libserver/symbols_cache.h
src/libserver/task.c

index 32ed6f786f2a025b19ad24f3e13343d7f3e30857..e5a140e4e98a9a078e1b35a0dc590010be79eae7 100644 (file)
@@ -112,7 +112,12 @@ cache_logic_cmp (const void *p1, const void *p2, gpointer ud)
        double weight1, weight2;
        double f1 = 0, f2 = 0, t1, t2;
 
-       if (i1->priority == i2->priority) {
+       if (i1->deps->len != 0 || i2->deps->len != 0) {
+               /* TODO: handle complex dependencies */
+               w1 = -(i1->deps->len);
+               w2 = -(i2->deps->len);
+       }
+       else if (i1->priority == i2->priority) {
                f1 = (double)i1->frequency / (double)cache->total_freq;
                f2 = (double)i2->frequency / (double)cache->total_freq;
                weight1 = abs (i1->weight) / cache->max_weight;
@@ -770,12 +775,86 @@ struct cache_savepoint {
        guint processed_num;
        guint pass;
        gint offset;
+       struct metric_result *rs;
+       gdouble lim;
+       GPtrArray *waitq;
 };
 
+
+static gboolean
+check_metric_settings (struct rspamd_task *task, struct metric *metric,
+       double *score)
+{
+       const ucl_object_t *mobj, *reject, *act;
+       double val;
+
+       if (task->settings == NULL) {
+               return FALSE;
+       }
+
+       mobj = ucl_object_find_key (task->settings, metric->name);
+       if (mobj != NULL) {
+               act = ucl_object_find_key (mobj, "actions");
+               if (act != NULL) {
+                       reject = ucl_object_find_key (act,
+                                       rspamd_action_to_str (METRIC_ACTION_REJECT));
+                       if (reject != NULL && ucl_object_todouble_safe (reject, &val)) {
+                               *score = val;
+                               return TRUE;
+                       }
+               }
+       }
+
+       return FALSE;
+}
+
+/* Return true if metric has score that is more than spam score for it */
+static gboolean
+rspamd_symbols_cache_metric_limit (struct rspamd_task *task,
+               struct cache_savepoint *cp)
+{
+       struct metric_result *res;
+       GList *cur;
+       struct metric *metric;
+       double ms;
+
+       cur = task->cfg->metrics_list;
+
+       if (cp->lim == 0.0) {
+               /*
+                * Look for metric that has the maximum reject score
+                */
+               while (cur) {
+                       metric = cur->data;
+                       res = g_hash_table_lookup (task->results, metric->name);
+
+                       if (res) {
+                               if (!check_metric_settings (task, metric, &ms)) {
+                                       ms = metric->actions[METRIC_ACTION_REJECT].score;
+                               }
+
+                               if (cp->lim < ms) {
+                                       cp->rs = res;
+                                       cp->lim = ms;
+                               }
+                       }
+
+                       cur = g_list_next (cur);
+               }
+       }
+
+       g_assert (cp->rs != NULL);
+
+       if (cp->rs->score > cp->lim) {
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
 gboolean
-rspamd_symbols_cache_process_symbol (struct rspamd_task * task,
-       struct symbols_cache * cache,
-       gpointer *save)
+rspamd_symbols_cache_process_symbols (struct rspamd_task * task,
+       struct symbols_cache * cache)
 {
        double t1, t2;
        guint64 diff;
@@ -784,70 +863,74 @@ rspamd_symbols_cache_process_symbol (struct rspamd_task * task,
        gint idx = -1, i;
 
        g_assert (cache != NULL);
-       g_assert (save != NULL);
 
-       if (*save == NULL) {
-               checkpoint = rspamd_mempool_alloc (task->task_pool, sizeof (*checkpoint));
+       if (task->checkpoint == NULL) {
+               checkpoint = rspamd_mempool_alloc0 (task->task_pool, sizeof (*checkpoint));
                checkpoint->processed_bits = rspamd_mempool_alloc (task->task_pool,
                                NBYTES (cache->used_items));
                /* Inverse to use ffs */
-               memset (checkpoint->processed_bits, 0xff, NBYTES (cache->used_items));
-               checkpoint->processed_num = 0;
-               checkpoint->pass = 0;
-               checkpoint->offset = 0;
-               *save = checkpoint;
+               memset (checkpoint->processed_bits, 0x0, NBYTES (cache->used_items));
+               checkpoint->waitq = g_ptr_array_new ();
+               rspamd_mempool_add_destructor (task->task_pool,
+                               rspamd_ptr_array_free_hard, checkpoint->waitq);
+               task->checkpoint = checkpoint;
+
+               rspamd_create_metric_result (task, DEFAULT_METRIC);
+               if (task->settings) {
+                       const ucl_object_t *wl;
+
+                       wl = ucl_object_find_key (task->settings, "whitelist");
+                       if (wl != NULL) {
+                               msg_info ("<%s> is whitelisted", task->message_id);
+                               task->flags |= RSPAMD_TASK_FLAG_SKIP;
+                               return TRUE;
+                       }
+               }
        }
        else {
-               checkpoint = *save;
+               checkpoint = task->checkpoint;
        }
 
        if (checkpoint->processed_num >= cache->used_items) {
                /* All symbols are processed */
-               return FALSE;
+               return TRUE;
        }
 
-       /* TODO: too slow approach */
        for (i = checkpoint->offset * NBBY; i < (gint)cache->used_items; i ++) {
-               if (isset (checkpoint->processed_bits, i)) {
-                       idx = i;
-                       break;
+               if (rspamd_symbols_cache_metric_limit (task, checkpoint)) {
+                       msg_info ("<%s> has already scored more than %.2f, so do not "
+                                       "plan any more checks", task->message_id,
+                                       checkpoint->rs->score);
+                       return TRUE;
                }
-       }
-
-       if (idx >= (checkpoint->offset + 1) * NBBY) {
-               checkpoint->offset ++;
-       }
 
-       g_assert (idx >= 0 && idx < (gint)cache->items_by_order->len);
-       item = g_ptr_array_index (cache->items_by_order, idx);
+               item = g_ptr_array_index (cache->items_by_order, idx);
+               if (!isset (checkpoint->processed_bits, i)) {
+                       if (item->type == SYMBOL_TYPE_NORMAL || item->type == SYMBOL_TYPE_CALLBACK) {
+                               g_assert (item->func != NULL);
+                               t1 = rspamd_get_ticks ();
+
+                               if (item->symbol != NULL &&
+                                               G_UNLIKELY (check_debug_symbol (task->cfg, item->symbol))) {
+                                       rspamd_log_debug (rspamd_main->logger);
+                                       item->func (task, item->user_data);
+                                       rspamd_log_nodebug (rspamd_main->logger);
+                               }
+                               else {
+                                       item->func (task, item->user_data);
+                               }
 
-       if (!item) {
-               return FALSE;
-       }
+                               t2 = rspamd_get_ticks ();
 
-       if (item->type == SYMBOL_TYPE_NORMAL || item->type == SYMBOL_TYPE_CALLBACK) {
-               g_assert (item->func != NULL);
-               t1 = rspamd_get_ticks ();
+                               diff = (t2 - t1) * 1000000;
+                               rspamd_set_counter (item, diff);
+                       }
 
-               if (item->symbol != NULL &&
-                               G_UNLIKELY (check_debug_symbol (task->cfg, item->symbol))) {
-                       rspamd_log_debug (rspamd_main->logger);
-                       item->func (task, item->user_data);
-                       rspamd_log_nodebug (rspamd_main->logger);
-               }
-               else {
-                       item->func (task, item->user_data);
+                       setbit (checkpoint->processed_bits, idx);
+                       checkpoint->processed_num ++;
                }
-
-               t2 = rspamd_get_ticks ();
-
-               diff = (t2 - t1) * 1000000;
-               rspamd_set_counter (item, diff);
        }
 
-       clrbit (checkpoint->processed_bits, idx);
-       checkpoint->processed_num ++;
-
        return TRUE;
 }
 
index 014a825dc925438412eec46d52524cfee31c4635..bf60e9aba8498d32368d5cf537fff91c4b4605be 100644 (file)
@@ -136,9 +136,8 @@ gint rspamd_symbols_cache_add_symbol (struct symbols_cache *cache,
  * @param cache symbols cache
  * @param saved_item pointer to currently saved item
  */
-gboolean rspamd_symbols_cache_process_symbol (struct rspamd_task *task,
-       struct symbols_cache *cache,
-       gpointer *save);
+gboolean rspamd_symbols_cache_process_symbols (struct rspamd_task *task,
+       struct symbols_cache *cache);
 
 /**
  * Validate cache items agains theirs weights defined in metrics
index d5a7243a3fe958e9464afe2dbdaac0a62ea68659..901fc1963b23423430abe78719347265801580e0 100644 (file)
@@ -311,99 +311,11 @@ rspamd_task_select_processing_stage (struct rspamd_task *task, guint stages)
        return RSPAMD_TASK_STAGE_DONE;
 }
 
-static gboolean
-check_metric_settings (struct rspamd_task *task, struct metric *metric,
-       double *score)
-{
-       const ucl_object_t *mobj, *reject, *act;
-       double val;
-
-       if (task->settings == NULL) {
-               return FALSE;
-       }
-
-       mobj = ucl_object_find_key (task->settings, metric->name);
-       if (mobj != NULL) {
-               act = ucl_object_find_key (mobj, "actions");
-               if (act != NULL) {
-                       reject = ucl_object_find_key (act,
-                                       rspamd_action_to_str (METRIC_ACTION_REJECT));
-                       if (reject != NULL && ucl_object_todouble_safe (reject, &val)) {
-                               *score = val;
-                               return TRUE;
-                       }
-               }
-       }
-
-       return FALSE;
-}
-
-/* Return true if metric has score that is more than spam score for it */
-static gboolean
-check_metric_is_spam (struct rspamd_task *task, struct metric *metric)
-{
-       struct metric_result *res;
-       double ms;
-
-       res = g_hash_table_lookup (task->results, metric->name);
-       if (res) {
-               if (!check_metric_settings (task, metric, &ms)) {
-                       ms = metric->actions[METRIC_ACTION_REJECT].score;
-               }
-               return (ms > 0 && res->score >= ms);
-       }
-
-       return FALSE;
-}
-
 static gboolean
 rspamd_process_filters (struct rspamd_task *task)
 {
-       GList *cur;
-       struct metric *metric;
-       gpointer item = NULL;
-
-       /* Insert default metric to be sure that it exists all the time */
-
-       if (task->checkpoint == NULL) {
-               rspamd_create_metric_result (task, DEFAULT_METRIC);
-               if (task->settings) {
-                       const ucl_object_t *wl;
-
-                       wl = ucl_object_find_key (task->settings, "whitelist");
-                       if (wl != NULL) {
-                               msg_info ("<%s> is whitelisted", task->message_id);
-                               task->flags |= RSPAMD_TASK_FLAG_SKIP;
-                               return TRUE;
-                       }
-               }
-
-               task->checkpoint = GUINT_TO_POINTER (0x1);
-       }
-       else {
-               /* TODO: in future, we need to add dependencies here */
-               return TRUE;
-       }
-
        /* Process metrics symbols */
-       while (rspamd_symbols_cache_process_symbol (task, task->cfg->cache, &item)) {
-               /* Check reject actions */
-               cur = task->cfg->metrics_list;
-               while (cur) {
-                       metric = cur->data;
-                       if (!(task->flags & RSPAMD_TASK_FLAG_PASS_ALL) &&
-                               metric->actions[METRIC_ACTION_REJECT].score > 0 &&
-                               check_metric_is_spam (task, metric)) {
-                               msg_info ("<%s> has already scored more than %.2f, so do not "
-                                               "plan any more checks", task->message_id,
-                                               metric->actions[METRIC_ACTION_REJECT].score);
-                               return TRUE;
-                       }
-                       cur = g_list_next (cur);
-               }
-       }
-
-       return TRUE;
+       return rspamd_symbols_cache_process_symbols (task, task->cfg->cache);
 }
 
 gboolean