diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-01-24 15:47:51 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-01-24 15:47:51 +0000 |
commit | 06f9df52ebfe23b8017e9fd07750a849656328a9 (patch) | |
tree | abb80c3506c1632c6823258ce79bdb70477b516f /src/libserver | |
parent | 71fb2acea714e1da39a53e89e1950925b9b8337c (diff) | |
download | rspamd-06f9df52ebfe23b8017e9fd07750a849656328a9.tar.gz rspamd-06f9df52ebfe23b8017e9fd07750a849656328a9.zip |
[Rework] Further fixes to symbols frequencies
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/symbols_cache.c | 94 | ||||
-rw-r--r-- | src/libserver/symbols_cache.h | 2 |
2 files changed, 55 insertions, 41 deletions
diff --git a/src/libserver/symbols_cache.c b/src/libserver/symbols_cache.c index a4c5b6059..d490f594d 100644 --- a/src/libserver/symbols_cache.c +++ b/src/libserver/symbols_cache.c @@ -73,7 +73,7 @@ struct symbols_cache { guint64 cksum; gdouble total_weight; guint used_items; - gdouble total_freq; + guint64 total_hits; struct rspamd_config *cfg; rspamd_mempool_mutex_t *mtx; gdouble reload_time; @@ -86,10 +86,12 @@ struct counter_data { }; struct item_stat { + struct counter_data time_counter; gdouble avg_time; gdouble weight; guint hits; guint64 total_hits; + struct counter_data frequency_counter; gdouble avg_frequency; gdouble stddev_frequency; }; @@ -297,10 +299,10 @@ cache_logic_cmp (const void *p1, const void *p2, gpointer ud) i2->symbol, w2 * 1000.0); } else if (i1->priority == i2->priority) { - avg_freq = (cache->total_freq / cache->used_items); + avg_freq = ((gdouble)cache->total_hits / cache->used_items); avg_weight = (cache->total_weight / cache->used_items); - f1 = (double)i1->st->hits / avg_freq; - f2 = (double)i2->st->hits / avg_freq; + f1 = (double)i1->st->total_hits / avg_freq; + f2 = (double)i2->st->total_hits / avg_freq; weight1 = fabs (i1->st->weight) / avg_weight; weight2 = fabs (i2->st->weight) / avg_weight; t1 = i1->st->avg_time; @@ -334,10 +336,8 @@ cache_logic_cmp (const void *p1, const void *p2, gpointer ud) * Set counter for a symbol */ static double -rspamd_set_counter (struct cache_item *item, gdouble value) +rspamd_set_counter (struct counter_data *cd, gdouble value) { - struct counter_data *cd; - cd = item->cd; /* Cumulative moving average using per-process counter data */ if (cd->number == 0) { @@ -356,18 +356,21 @@ rspamd_symbols_cache_resort (struct symbols_cache *cache) { struct symbols_cache_order *ord; guint i; + guint64 total_hits = 0; struct cache_item *it; ord = rspamd_symbols_cache_order_new (cache->used_items); for (i = 0; i < cache->used_items; i ++) { it = g_ptr_array_index (cache->items_by_id, i); + total_hits += it->st->total_hits; if (!(it->type & (SYMBOL_TYPE_PREFILTER|SYMBOL_TYPE_POSTFILTER|SYMBOL_TYPE_COMPOSITE))) { g_ptr_array_add (ord->d, it); } } + cache->total_hits = total_hits; g_ptr_array_sort_with_data (ord->d, cache_logic_cmp, cache); if (cache->items_by_order) { @@ -620,12 +623,11 @@ rspamd_symbols_cache_load_items (struct symbols_cache *cache, const gchar *name) * We maintain avg_time for virtual symbols equal to the * parent item avg_time */ - parent->st->avg_time = item->st->avg_time; - parent->st->total_hits = item->st->total_hits; + item->st->avg_time = parent->st->avg_time; } cache->total_weight += fabs (item->st->weight); - cache->total_freq += item->st->hits; + cache->total_hits += item->st->total_hits; } } @@ -679,15 +681,15 @@ rspamd_symbols_cache_save_items (struct symbols_cache *cache, const gchar *name) elt = ucl_object_typed_new (UCL_OBJECT); ucl_object_insert_key (elt, ucl_object_fromdouble (item->st->weight), "weight", 0, false); - ucl_object_insert_key (elt, ucl_object_fromdouble (item->st->avg_time), + ucl_object_insert_key (elt, ucl_object_fromdouble (item->st->time_counter.mean), "time", 0, false); ucl_object_insert_key (elt, ucl_object_fromdouble (item->st->total_hits), "count", 0, false); freq = ucl_object_typed_new (UCL_OBJECT); - ucl_object_insert_key (freq, ucl_object_fromdouble (item->st->avg_frequency), + ucl_object_insert_key (freq, ucl_object_fromdouble (item->st->frequency_counter.mean), "avg", 0, false); - ucl_object_insert_key (freq, ucl_object_fromdouble (item->st->stddev_frequency), + ucl_object_insert_key (freq, ucl_object_fromdouble (item->st->frequency_counter.stddev), "stddev", 0, false); ucl_object_insert_key (elt, freq, "frequency", 0, false); @@ -933,7 +935,7 @@ rspamd_symbols_cache_new (struct rspamd_config *cfg) cache->composites = g_ptr_array_new (); cache->mtx = rspamd_mempool_get_mutex (cache->static_pool); cache->reload_time = CACHE_RELOAD_TIME; - cache->total_freq = 1; + cache->total_hits = 1; cache->total_weight = 1.0; cache->cfg = cfg; cache->cksum = 0xdeadbabe; @@ -1282,7 +1284,7 @@ rspamd_symbols_cache_check_symbol (struct rspamd_task *task, } if (rspamd_worker_is_normal (task->worker)) { - rspamd_set_counter (item, diff); + rspamd_set_counter (item->cd, diff); } rspamd_session_watch_stop (task->s); @@ -1873,14 +1875,40 @@ rspamd_symbols_cache_resort_cb (gint fd, short what, gpointer ud) /* Gather stats from shared execution times */ for (i = 0; i < cache->items_by_id->len; i ++) { item = g_ptr_array_index (cache->items_by_id, i); - if (item->cd->number > 0) { - item->st->total_hits += item->cd->number; + if (item->st->hits > 0) { + item->st->total_hits += item->st->hits; + item->st->hits = 0; + + if (item->last_count > 0 && cbdata->w->index == 0) { + /* Calculate frequency */ + gdouble cur_err, cur_value; + + cur_value = (item->st->total_hits - item->last_count) / + (cur_ticks - cbdata->last_resort); + rspamd_set_counter (&item->st->frequency_counter, + cur_value); + item->st->avg_frequency = item->st->frequency_counter.mean; + item->st->stddev_frequency = item->st->frequency_counter.stddev; + + cur_err = (item->st->avg_frequency - cur_value); + cur_err *= cur_err; + + /* + * TODO: replace magic number + */ + if (item->st->frequency_counter.number > 10 && + cur_err > item->st->stddev_frequency * 2) { + item->frequency_peaks ++; + } + } + + item->last_count = item->st->total_hits; if (item->type & (SYMBOL_TYPE_CALLBACK|SYMBOL_TYPE_NORMAL)) { - item->st->avg_time = item->st->avg_time + - (item->cd->mean - item->st->avg_time) / - (gdouble)item->st->total_hits; - item->cd->mean = item->st->avg_time; + rspamd_set_counter (&item->st->time_counter, + item->st->avg_time); + memset (item->cd, 0, sizeof (*item->cd)); + item->st->avg_time = item->st->time_counter.mean; } item->cd->number = item->st->total_hits; @@ -1900,14 +1928,6 @@ rspamd_symbols_cache_resort_cb (gint fd, short what, gpointer ud) } } - if (cbdata->w->index == 0) { - /* We also calculate frequencies */ - for (i = 0; i < cache->items_by_id->len; i ++) { - item = g_ptr_array_index (cache->items_by_id, i); - - } - } - rspamd_mempool_unlock_mutex (cache->mtx); } @@ -1929,6 +1949,7 @@ rspamd_symbols_cache_start_refresh (struct symbols_cache * cache, cbdata->w = w; cbdata->cache = cache; tm = rspamd_time_jitter (cache->reload_time, 0); + msg_debug_cache ("next reload in %.2f seconds", tm); g_assert (cache != NULL); evtimer_set (&cbdata->resort_ev, rspamd_symbols_cache_resort_cb, cbdata); event_base_set (ev_base, &cbdata->resort_ev); @@ -1940,7 +1961,7 @@ void rspamd_symbols_cache_inc_frequency (struct symbols_cache *cache, const gchar *symbol) { - struct cache_item *item, *parent; + struct cache_item *item; g_assert (cache != NULL); @@ -1948,13 +1969,6 @@ rspamd_symbols_cache_inc_frequency (struct symbols_cache *cache, if (item != NULL) { g_atomic_int_inc (&item->st->hits); - cache->total_freq ++; - - /* For virtual symbols we also increase counter for parent */ - if (item->parent != -1) { - parent = g_ptr_array_index (cache->items_by_id, item->parent); - g_atomic_int_inc (&parent->st->hits); - } } } @@ -2015,7 +2029,7 @@ rspamd_symbols_cache_find_symbol (struct symbols_cache *cache, const gchar *name gboolean rspamd_symbols_cache_stat_symbol (struct symbols_cache *cache, const gchar *name, - guint *frequency, + gdouble *frequency, gdouble *tm) { struct cache_item *item; @@ -2029,8 +2043,8 @@ rspamd_symbols_cache_stat_symbol (struct symbols_cache *cache, item = g_hash_table_lookup (cache->items_by_symbol, name); if (item != NULL) { - *frequency = item->st->hits; - *tm = item->st->avg_time; + *frequency = item->st->frequency_counter.mean; + *tm = item->st->time_counter.mean; return TRUE; } diff --git a/src/libserver/symbols_cache.h b/src/libserver/symbols_cache.h index daecfaa24..5755575ab 100644 --- a/src/libserver/symbols_cache.h +++ b/src/libserver/symbols_cache.h @@ -136,7 +136,7 @@ gint rspamd_symbols_cache_find_symbol (struct symbols_cache *cache, */ gboolean rspamd_symbols_cache_stat_symbol (struct symbols_cache *cache, const gchar *name, - guint *frequency, + gdouble *frequency, gdouble *tm); /** * Find symbol in cache by its id |