]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Further fixes to symbols frequencies
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 24 Jan 2017 15:47:51 +0000 (15:47 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 24 Jan 2017 15:47:51 +0000 (15:47 +0000)
src/client/rspamc.c
src/controller.c
src/libserver/symbols_cache.c
src/libserver/symbols_cache.h

index 764dd93765b9be8143998cbe3c01d7862697212d..dac493d8ce5a306a925b487438d2a5f7b7993584 100644 (file)
@@ -837,7 +837,7 @@ rspamc_counters_output (FILE *out, ucl_object_t *obj)
                printf ("\033[0m");
        }
        rspamd_snprintf (fmt_buf, sizeof (fmt_buf),
-               "| %%3d | %%%ds | %%6.1f | %%9d | %%9.3f |\n", max_len);
+               "| %%3d | %%%ds | %%6.1f | %%9.3f | %%9.3f |\n", max_len);
 
        iter = NULL;
        i = 0;
@@ -851,7 +851,7 @@ rspamc_counters_output (FILE *out, ucl_object_t *obj)
                        printf (fmt_buf, i,
                                ucl_object_tostring (sym),
                                ucl_object_todouble (weight),
-                               (gint)ucl_object_toint (freq),
+                               ucl_object_todouble (freq),
                                ucl_object_todouble (tim));
                }
                i++;
index 599cea35c1ceb7b844b87bed9261fde036f036e7..b5af6adc9164a1e9d4a674a17a8dbb9691ae61c8 100644 (file)
@@ -776,8 +776,7 @@ rspamd_controller_handle_symbols (struct rspamd_http_connection_entry *conn_ent,
                group_symbols = ucl_object_typed_new (UCL_ARRAY);
 
                while (g_hash_table_iter_next (&sit, &k, &v)) {
-                       guint freq = 0;
-                       gdouble tm = 0.0;
+                       gdouble tm = 0.0, freq = 0;
 
                        sym = v;
                        sym_obj = ucl_object_typed_new (UCL_OBJECT);
@@ -796,7 +795,7 @@ rspamd_controller_handle_symbols (struct rspamd_http_connection_entry *conn_ent,
                        if (rspamd_symbols_cache_stat_symbol (session->ctx->cfg->cache,
                                        sym->name, &freq, &tm)) {
                                ucl_object_insert_key (sym_obj,
-                                               ucl_object_fromint (freq),
+                                               ucl_object_fromdouble (freq),
                                                "frequency", 0, false);
                                ucl_object_insert_key (sym_obj,
                                                ucl_object_fromdouble (tm),
index a4c5b605902b935a880e7afbdd5628d64b37d1e9..d490f594d17c2d9e7456ad8eed19d5aa20dd2694 100644 (file)
@@ -73,7 +73,7 @@ struct symbols_cache {
        guint64 cksum;
        gdouble total_weight;
        guint used_items;
-       gdouble total_freq;
+       guint64 total_hits;
        struct rspamd_config *cfg;
        rspamd_mempool_mutex_t *mtx;
        gdouble reload_time;
@@ -86,10 +86,12 @@ struct counter_data {
 };
 
 struct item_stat {
+       struct counter_data time_counter;
        gdouble avg_time;
        gdouble weight;
        guint hits;
        guint64 total_hits;
+       struct counter_data frequency_counter;
        gdouble avg_frequency;
        gdouble stddev_frequency;
 };
@@ -297,10 +299,10 @@ cache_logic_cmp (const void *p1, const void *p2, gpointer ud)
                                i2->symbol, w2 * 1000.0);
        }
        else if (i1->priority == i2->priority) {
-               avg_freq = (cache->total_freq / cache->used_items);
+               avg_freq = ((gdouble)cache->total_hits / cache->used_items);
                avg_weight = (cache->total_weight / cache->used_items);
-               f1 = (double)i1->st->hits / avg_freq;
-               f2 = (double)i2->st->hits / avg_freq;
+               f1 = (double)i1->st->total_hits / avg_freq;
+               f2 = (double)i2->st->total_hits / avg_freq;
                weight1 = fabs (i1->st->weight) / avg_weight;
                weight2 = fabs (i2->st->weight) / avg_weight;
                t1 = i1->st->avg_time;
@@ -334,10 +336,8 @@ cache_logic_cmp (const void *p1, const void *p2, gpointer ud)
  * Set counter for a symbol
  */
 static double
-rspamd_set_counter (struct cache_item *item, gdouble value)
+rspamd_set_counter (struct counter_data *cd, gdouble value)
 {
-       struct counter_data *cd;
-       cd = item->cd;
 
        /* Cumulative moving average using per-process counter data */
        if (cd->number == 0) {
@@ -356,18 +356,21 @@ rspamd_symbols_cache_resort (struct symbols_cache *cache)
 {
        struct symbols_cache_order *ord;
        guint i;
+       guint64 total_hits = 0;
        struct cache_item *it;
 
        ord = rspamd_symbols_cache_order_new (cache->used_items);
 
        for (i = 0; i < cache->used_items; i ++) {
                it = g_ptr_array_index (cache->items_by_id, i);
+               total_hits += it->st->total_hits;
 
                if (!(it->type & (SYMBOL_TYPE_PREFILTER|SYMBOL_TYPE_POSTFILTER|SYMBOL_TYPE_COMPOSITE))) {
                        g_ptr_array_add (ord->d, it);
                }
        }
 
+       cache->total_hits = total_hits;
        g_ptr_array_sort_with_data (ord->d, cache_logic_cmp, cache);
 
        if (cache->items_by_order) {
@@ -620,12 +623,11 @@ rspamd_symbols_cache_load_items (struct symbols_cache *cache, const gchar *name)
                                 * We maintain avg_time for virtual symbols equal to the
                                 * parent item avg_time
                                 */
-                               parent->st->avg_time = item->st->avg_time;
-                               parent->st->total_hits = item->st->total_hits;
+                               item->st->avg_time = parent->st->avg_time;
                        }
 
                        cache->total_weight += fabs (item->st->weight);
-                       cache->total_freq += item->st->hits;
+                       cache->total_hits += item->st->total_hits;
                }
        }
 
@@ -679,15 +681,15 @@ rspamd_symbols_cache_save_items (struct symbols_cache *cache, const gchar *name)
                elt = ucl_object_typed_new (UCL_OBJECT);
                ucl_object_insert_key (elt, ucl_object_fromdouble (item->st->weight),
                                "weight", 0, false);
-               ucl_object_insert_key (elt, ucl_object_fromdouble (item->st->avg_time),
+               ucl_object_insert_key (elt, ucl_object_fromdouble (item->st->time_counter.mean),
                                "time", 0, false);
                ucl_object_insert_key (elt, ucl_object_fromdouble (item->st->total_hits),
                                "count", 0, false);
 
                freq = ucl_object_typed_new (UCL_OBJECT);
-               ucl_object_insert_key (freq, ucl_object_fromdouble (item->st->avg_frequency),
+               ucl_object_insert_key (freq, ucl_object_fromdouble (item->st->frequency_counter.mean),
                                "avg", 0, false);
-               ucl_object_insert_key (freq, ucl_object_fromdouble (item->st->stddev_frequency),
+               ucl_object_insert_key (freq, ucl_object_fromdouble (item->st->frequency_counter.stddev),
                                "stddev", 0, false);
                ucl_object_insert_key (elt, freq, "frequency", 0, false);
 
@@ -933,7 +935,7 @@ rspamd_symbols_cache_new (struct rspamd_config *cfg)
        cache->composites = g_ptr_array_new ();
        cache->mtx = rspamd_mempool_get_mutex (cache->static_pool);
        cache->reload_time = CACHE_RELOAD_TIME;
-       cache->total_freq = 1;
+       cache->total_hits = 1;
        cache->total_weight = 1.0;
        cache->cfg = cfg;
        cache->cksum = 0xdeadbabe;
@@ -1282,7 +1284,7 @@ rspamd_symbols_cache_check_symbol (struct rspamd_task *task,
                        }
 
                        if (rspamd_worker_is_normal (task->worker)) {
-                               rspamd_set_counter (item, diff);
+                               rspamd_set_counter (item->cd, diff);
                        }
 
                        rspamd_session_watch_stop (task->s);
@@ -1873,14 +1875,40 @@ rspamd_symbols_cache_resort_cb (gint fd, short what, gpointer ud)
                /* Gather stats from shared execution times */
                for (i = 0; i < cache->items_by_id->len; i ++) {
                        item = g_ptr_array_index (cache->items_by_id, i);
-                       if (item->cd->number > 0) {
-                               item->st->total_hits += item->cd->number;
+                       if (item->st->hits > 0) {
+                               item->st->total_hits += item->st->hits;
+                               item->st->hits = 0;
+
+                               if (item->last_count > 0 && cbdata->w->index == 0) {
+                                       /* Calculate frequency */
+                                       gdouble cur_err, cur_value;
+
+                                       cur_value = (item->st->total_hits - item->last_count) /
+                                                       (cur_ticks - cbdata->last_resort);
+                                       rspamd_set_counter (&item->st->frequency_counter,
+                                                       cur_value);
+                                       item->st->avg_frequency = item->st->frequency_counter.mean;
+                                       item->st->stddev_frequency = item->st->frequency_counter.stddev;
+
+                                       cur_err = (item->st->avg_frequency - cur_value);
+                                       cur_err *= cur_err;
+
+                                       /*
+                                        * TODO: replace magic number
+                                        */
+                                       if (item->st->frequency_counter.number > 10 &&
+                                                       cur_err > item->st->stddev_frequency * 2) {
+                                               item->frequency_peaks ++;
+                                       }
+                               }
+
+                               item->last_count = item->st->total_hits;
 
                                if (item->type & (SYMBOL_TYPE_CALLBACK|SYMBOL_TYPE_NORMAL)) {
-                                       item->st->avg_time = item->st->avg_time +
-                                                       (item->cd->mean - item->st->avg_time) /
-                                                       (gdouble)item->st->total_hits;
-                                       item->cd->mean = item->st->avg_time;
+                                       rspamd_set_counter (&item->st->time_counter,
+                                                       item->st->avg_time);
+                                       memset (item->cd, 0, sizeof (*item->cd));
+                                       item->st->avg_time = item->st->time_counter.mean;
                                }
 
                                item->cd->number = item->st->total_hits;
@@ -1900,14 +1928,6 @@ rspamd_symbols_cache_resort_cb (gint fd, short what, gpointer ud)
                        }
                }
 
-               if (cbdata->w->index == 0) {
-                       /* We also calculate frequencies */
-                       for (i = 0; i < cache->items_by_id->len; i ++) {
-                               item = g_ptr_array_index (cache->items_by_id, i);
-
-                       }
-               }
-
                rspamd_mempool_unlock_mutex (cache->mtx);
        }
 
@@ -1929,6 +1949,7 @@ rspamd_symbols_cache_start_refresh (struct symbols_cache * cache,
        cbdata->w = w;
        cbdata->cache = cache;
        tm = rspamd_time_jitter (cache->reload_time, 0);
+       msg_debug_cache ("next reload in %.2f seconds", tm);
        g_assert (cache != NULL);
        evtimer_set (&cbdata->resort_ev, rspamd_symbols_cache_resort_cb, cbdata);
        event_base_set (ev_base, &cbdata->resort_ev);
@@ -1940,7 +1961,7 @@ void
 rspamd_symbols_cache_inc_frequency (struct symbols_cache *cache,
                const gchar *symbol)
 {
-       struct cache_item *item, *parent;
+       struct cache_item *item;
 
        g_assert (cache != NULL);
 
@@ -1948,13 +1969,6 @@ rspamd_symbols_cache_inc_frequency (struct symbols_cache *cache,
 
        if (item != NULL) {
                g_atomic_int_inc (&item->st->hits);
-               cache->total_freq ++;
-
-               /* For virtual symbols we also increase counter for parent */
-               if (item->parent != -1) {
-                       parent = g_ptr_array_index (cache->items_by_id, item->parent);
-                       g_atomic_int_inc (&parent->st->hits);
-               }
        }
 }
 
@@ -2015,7 +2029,7 @@ rspamd_symbols_cache_find_symbol (struct symbols_cache *cache, const gchar *name
 gboolean
 rspamd_symbols_cache_stat_symbol (struct symbols_cache *cache,
                const gchar *name,
-               guint *frequency,
+               gdouble *frequency,
                gdouble *tm)
 {
        struct cache_item *item;
@@ -2029,8 +2043,8 @@ rspamd_symbols_cache_stat_symbol (struct symbols_cache *cache,
        item = g_hash_table_lookup (cache->items_by_symbol, name);
 
        if (item != NULL) {
-               *frequency = item->st->hits;
-               *tm = item->st->avg_time;
+               *frequency = item->st->frequency_counter.mean;
+               *tm = item->st->time_counter.mean;
 
                return TRUE;
        }
index daecfaa2450f3c068b0d62c5115ec59d11f7244a..5755575ab978da082f8dafe05ea286239eb41f7b 100644 (file)
@@ -136,7 +136,7 @@ gint rspamd_symbols_cache_find_symbol (struct symbols_cache *cache,
  */
 gboolean rspamd_symbols_cache_stat_symbol (struct symbols_cache *cache,
                const gchar *name,
-               guint *frequency,
+               gdouble *frequency,
                gdouble *tm);
 /**
  * Find symbol in cache by its id