From: Vsevolod Stakhov Date: Tue, 14 Jul 2009 13:09:49 +0000 (+0400) Subject: * Fix symbol planning, add cache_file directive to config file X-Git-Tag: 0.2.7~88 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=5f5254ff214fd77a6fdc8cce9269c29837fd162a;p=rspamd.git * Fix symbol planning, add cache_file directive to config file * Fix some errors --- diff --git a/src/cfg_file.l b/src/cfg_file.l index 06e1a33e7..7f2931ccc 100644 --- a/src/cfg_file.l +++ b/src/cfg_file.l @@ -58,6 +58,7 @@ metric return METRIC; name return NAME; required_score return REQUIRED_SCORE; function return FUNCTION; +cache_file return CACHE_FILE; control return CONTROL; password return PASSWORD; lmtp return LMTP; diff --git a/src/cfg_file.y b/src/cfg_file.y index af54a9fb4..7a2aa3cb7 100644 --- a/src/cfg_file.y +++ b/src/cfg_file.y @@ -49,7 +49,7 @@ struct rspamd_view *cur_view = NULL; %token READ_SERVERS WRITE_SERVER DIRECTORY_SERVERS MAILBOX_QUERY USERS_QUERY LASTLOGIN_QUERY %token MEMCACHED WORKER TYPE REQUIRE MODULE %token MODULE_OPT PARAM VARIABLE -%token FILTERS FACTORS METRIC NAME +%token FILTERS FACTORS METRIC NAME CACHE_FILE %token REQUIRED_SCORE FUNCTION FRACT COMPOSITES CONTROL PASSWORD %token LOGGING LOG_TYPE LOG_TYPE_CONSOLE LOG_TYPE_SYSLOG LOG_TYPE_FILE %token LOG_LEVEL LOG_LEVEL_DEBUG LOG_LEVEL_INFO LOG_LEVEL_WARNING LOG_LEVEL_ERROR LOG_FACILITY LOG_FILENAME @@ -362,6 +362,7 @@ metriccmd: | metricfunction | metricscore | metricclassifier + | metriccache ; metricname: @@ -416,6 +417,15 @@ metricclassifier: } ; +metriccache: + CACHE_FILE EQSIGN QUOTEDSTRING { + if (cur_metric == NULL) { + cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric)); + } + cur_metric->cache_filename = memory_pool_strdup (cfg->cfg_pool, $3); + } + ; + factors: FACTORS OBRACE factorsbody EBRACE ; diff --git a/src/cfg_utils.c b/src/cfg_utils.c index 5688d44a0..7307db803 100644 --- a/src/cfg_utils.c +++ b/src/cfg_utils.c @@ -171,7 +171,6 @@ parse_bind_line (struct config_file *cfg, struct worker_conf *cf, char *str) void init_defaults (struct config_file *cfg) { - struct metric *def_metric; cfg->memcached_error_time = DEFAULT_UPSTREAM_ERROR_TIME; cfg->memcached_dead_time = DEFAULT_UPSTREAM_DEAD_TIME; @@ -189,14 +188,6 @@ init_defaults (struct config_file *cfg) cfg->statfiles = g_hash_table_new (g_str_hash, g_str_equal); cfg->cfg_params = g_hash_table_new (g_str_hash, g_str_equal); - def_metric = memory_pool_alloc (cfg->cfg_pool, sizeof (struct metric)); - def_metric->name = "default"; - def_metric->func_name = "factors"; - def_metric->func = factor_consolidation_func; - def_metric->required_score = DEFAULT_SCORE; - def_metric->classifier = get_classifier ("winnow"); - g_hash_table_insert (cfg->metrics, "default", def_metric); - } void @@ -479,6 +470,7 @@ void post_load_config (struct config_file *cfg) { struct timespec ts; + struct metric *def_metric; g_hash_table_foreach (cfg->variables, substitute_all_variables, cfg); g_hash_table_foreach (cfg->modules_opts, substitute_module_variables, cfg); @@ -499,6 +491,18 @@ post_load_config (struct config_file *cfg) if (cfg->clock_res > 3) { cfg->clock_res = 3; } + + if (g_hash_table_lookup (cfg->metrics, DEFAULT_METRIC) == NULL) { + def_metric = memory_pool_alloc (cfg->cfg_pool, sizeof (struct metric)); + def_metric->name = DEFAULT_METRIC; + def_metric->func_name = "factors"; + def_metric->func = factor_consolidation_func; + def_metric->required_score = DEFAULT_SCORE; + def_metric->classifier = get_classifier ("winnow"); + cfg->metrics_list = g_list_prepend (cfg->metrics_list, def_metric); + g_hash_table_insert (cfg->metrics, DEFAULT_METRIC, def_metric); + } + } diff --git a/src/filter.c b/src/filter.c index 2bfd2bc36..a1894603d 100644 --- a/src/filter.c +++ b/src/filter.c @@ -50,6 +50,8 @@ insert_result (struct worker_task *task, const char *metric_name, const char *sy struct metric *metric; struct metric_result *metric_res; struct symbol *s; + struct cache_item *item; + int i; metric = g_hash_table_lookup (task->worker->srv->cfg->metrics, metric_name); if (metric == NULL) { @@ -95,6 +97,17 @@ insert_result (struct worker_task *task, const char *metric_name, const char *sy g_hash_table_insert (metric_res->symbols, (gpointer)symbol, s); } + + /* Process cache item */ + if (metric->cache) { + for (i = 0; i < metric->cache->used_items; i ++) { + item = &metric->cache->items[i]; + + if (flag > 0 && strcmp (item->s->symbol, symbol) == 0) { + item->s->frequency ++; + } + } + } } /* @@ -219,6 +232,7 @@ check_metric_is_spam (struct worker_task *task, struct metric *metric) res = g_hash_table_lookup (task->results, metric->name); if (res) { + metric_process_callback_forced (metric->name, res, task); return res->score >= metric->required_score; } diff --git a/src/main.c b/src/main.c index beefabdcb..72d5b1d62 100644 --- a/src/main.c +++ b/src/main.c @@ -616,7 +616,9 @@ main (int argc, char **argv, char **env) l = g_list_first (cfg->metrics_list); while (l) { metric = l->data; - init_symbols_cache (cfg->cfg_pool, metric->cache, metric->cache_filename); + if (!init_symbols_cache (cfg->cfg_pool, metric->cache, metric->cache_filename)) { + exit (EXIT_FAILURE); + } l = g_list_next (l); } diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index 19c46c7b7..951285070 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -108,10 +108,10 @@ chartable_module_config (struct config_file *cfg) /* Search in factors hash table */ w = g_hash_table_lookup (cfg->factors, chartable_module_ctx->symbol); if (w == NULL) { - register_symbol (metric->cache, chartable_module_ctx->symbol, 1, chartable_symbol_callback, NULL); + register_symbol (&metric->cache, chartable_module_ctx->symbol, 1, chartable_symbol_callback, NULL); } else { - register_symbol (metric->cache, chartable_module_ctx->symbol, *w, chartable_symbol_callback, NULL); + register_symbol (&metric->cache, chartable_module_ctx->symbol, *w, chartable_symbol_callback, NULL); } return res; diff --git a/src/plugins/emails.c b/src/plugins/emails.c index 6b789916b..ef886ecf5 100644 --- a/src/plugins/emails.c +++ b/src/plugins/emails.c @@ -116,10 +116,10 @@ emails_module_config (struct config_file *cfg) /* Search in factors hash table */ w = g_hash_table_lookup (cfg->factors, email_module_ctx->symbol); if (w == NULL) { - register_symbol (metric->cache, email_module_ctx->symbol, 1, emails_symbol_callback, NULL); + register_symbol (&metric->cache, email_module_ctx->symbol, 1, emails_symbol_callback, NULL); } else { - register_symbol (metric->cache, email_module_ctx->symbol, *w, emails_symbol_callback, NULL); + register_symbol (&metric->cache, email_module_ctx->symbol, *w, emails_symbol_callback, NULL); } return res; diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 7b123dd33..23ec3908c 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -194,10 +194,10 @@ regexp_module_config (struct config_file *cfg) /* Search in factors hash table */ w = g_hash_table_lookup (cfg->factors, cur->param); if (w == NULL) { - register_symbol (metric->cache, cur->param, 1, process_regexp_item, cur_item); + register_symbol (&metric->cache, cur->param, 1, process_regexp_item, cur_item); } else { - register_symbol (metric->cache, cur->param, *w, process_regexp_item, cur_item); + register_symbol (&metric->cache, cur->param, *w, process_regexp_item, cur_item); } cur_opt = g_list_next (cur_opt); diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 691c57681..15bdc149b 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -195,10 +195,10 @@ surbl_module_config (struct config_file *cfg) /* Search in factors hash table */ w = g_hash_table_lookup (cfg->factors, new_suffix->symbol); if (w == NULL) { - register_symbol (metric->cache, new_suffix->symbol, 1, surbl_test_url, NULL); + register_symbol (&metric->cache, new_suffix->symbol, 1, surbl_test_url, new_suffix); } else { - register_symbol (metric->cache, new_suffix->symbol, *w, surbl_test_url, NULL); + register_symbol (&metric->cache, new_suffix->symbol, *w, surbl_test_url, new_suffix); } } } @@ -227,10 +227,10 @@ surbl_module_config (struct config_file *cfg) surbl_module_ctx->suffixes = g_list_prepend (surbl_module_ctx->suffixes, new_suffix); w = g_hash_table_lookup (cfg->factors, new_suffix->symbol); if (w == NULL) { - register_symbol (metric->cache, new_suffix->symbol, 1, surbl_test_url, new_suffix); + register_symbol (&metric->cache, new_suffix->symbol, 1, surbl_test_url, new_suffix); } else { - register_symbol (metric->cache, new_suffix->symbol, *w, surbl_test_url, new_suffix); + register_symbol (&metric->cache, new_suffix->symbol, *w, surbl_test_url, new_suffix); } } diff --git a/src/symbols_cache.c b/src/symbols_cache.c index 60cdd2644..5ed0a92d9 100644 --- a/src/symbols_cache.c +++ b/src/symbols_cache.c @@ -46,7 +46,7 @@ int cache_cmp (const void *p1, const void *p2) { const struct cache_item *i1 = p1, *i2 = p2; - + return strcmp (i1->s->symbol, i2->s->symbol); } @@ -70,22 +70,20 @@ static void grow_cache (struct symbols_cache *cache) { guint old = cache->cur_items, i; + void *new; cache->cur_items = cache->cur_items * 2; - cache->items = g_renew (struct cache_item, cache->items, cache->cur_items); + new = g_new0 (struct cache_item, cache->cur_items); + memcpy (new, cache->items, old * sizeof (struct cache_item)); + g_free (cache->items); + cache->items = new; + /* Create new saved_cache_items */ for (i = old - 1; i < cache->cur_items; i ++) { - cache->items[i].s = g_malloc (sizeof (struct saved_cache_item)); + cache->items[i].s = g_new0 (struct saved_cache_item, 1); } } -static void -truncate_cache (struct symbols_cache *cache) -{ - cache->items = g_renew (struct cache_item, cache->items, cache->used_items); - cache->cur_items = cache->used_items; -} - static GChecksum * get_mem_cksum (struct symbols_cache *cache) { @@ -186,41 +184,37 @@ create_cache_file (struct symbols_cache *cache, const char *filename, int fd) } void -register_symbol (struct symbols_cache *cache, const char *name, double weight, symbol_func_t func, gpointer user_data) +register_symbol (struct symbols_cache **cache, const char *name, double weight, symbol_func_t func, gpointer user_data) { struct cache_item *item = NULL; int i; - if (cache == NULL) { - cache = g_new0 (struct symbols_cache, 1); + if (*cache == NULL) { + *cache = g_new0 (struct symbols_cache, 1); } - if (cache->items == NULL) { - cache->cur_items = MIN_CACHE; - cache->used_items = 0; - cache->items = g_new0 (struct cache_item, cache->cur_items); - for (i = 0; i < cache->cur_items; i ++) { - cache->items[i].s = g_malloc (sizeof (struct saved_cache_item)); - } - } - - for (i = 0; i < cache->cur_items; i ++) { - if (cache->items[i].s->symbol[0] != '\0') { - item = &cache->items[i]; + if ((*cache)->items == NULL) { + (*cache)->cur_items = MIN_CACHE; + (*cache)->used_items = 0; + (*cache)->items = g_new0 (struct cache_item, (*cache)->cur_items); + for (i = 0; i < (*cache)->cur_items; i ++) { + (*cache)->items[i].s = g_new0 (struct saved_cache_item, 1); } } - if (item == NULL) { - grow_cache (cache); + if ((*cache)->used_items >= (*cache)->cur_items) { + grow_cache (*cache); /* Call once more */ register_symbol (cache, name, weight, func, user_data); return; } + item = &(*cache)->items[(*cache)->used_items]; + g_strlcpy (item->s->symbol, name, sizeof (item->s->symbol)); item->func = func; item->user_data = user_data; item->s->weight = weight; - cache->used_items ++; + (*cache)->used_items ++; set_counter (item->s->symbol, 0); } @@ -237,7 +231,6 @@ init_symbols_cache (memory_pool_t *pool, struct symbols_cache *cache, const char return FALSE; } - truncate_cache (cache); /* Sort items in cache */ qsort (cache->items, cache->used_items, sizeof (struct cache_item), cache_cmp); @@ -288,7 +281,7 @@ init_symbols_cache (memory_pool_t *pool, struct symbols_cache *cache, const char g_checksum_get_digest (cksum, mem_sum, &cklen); /* Now try to read file sum */ - if (lseek (fd, SEEK_END, -(cklen)) == -1) { + if (lseek (fd, -(cklen), SEEK_END) == -1) { close (fd); g_free (mem_sum); g_checksum_free (cksum); @@ -340,6 +333,7 @@ call_symbol_callback (struct worker_task *task, struct symbols_cache *cache, str return FALSE; } if (cache->uses ++ >= MAX_USES) { + msg_info ("call_symbols_callback: resort symbols cache"); memory_pool_wlock_rwlock (cache->lock); cache->uses = 0; /* Resort while having write lock */ @@ -350,7 +344,7 @@ call_symbol_callback (struct worker_task *task, struct symbols_cache *cache, str } else { /* Next pointer */ - if (*saved_item - cache->items == cache->used_items) { + if (*saved_item - cache->items >= cache->used_items - 1) { /* No more items in cache */ return FALSE; } @@ -358,7 +352,6 @@ call_symbol_callback (struct worker_task *task, struct symbols_cache *cache, str item = *saved_item + 1; memory_pool_runlock_rwlock (cache->lock); } - if (check_view (task->cfg->views, item->s->symbol, task)) { #ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts1); @@ -379,7 +372,6 @@ call_symbol_callback (struct worker_task *task, struct symbols_cache *cache, str diff = (ts2.tv_sec - ts1.tv_sec) * 1000000 + (ts2.tv_nsec - ts1.tv_nsec) / 1000; item->s->avg_time = set_counter (item->s->symbol, diff); - item->s->frequency ++; } *saved_item = item; diff --git a/src/symbols_cache.h b/src/symbols_cache.h index e8e0be24f..65028cfcc 100644 --- a/src/symbols_cache.h +++ b/src/symbols_cache.h @@ -41,7 +41,7 @@ gboolean init_symbols_cache (memory_pool_t *pool, struct symbols_cache *cache, c * @param func pointer to handler * @param user_data pointer to user_data */ -void register_symbol (struct symbols_cache *cache, const char *name, double weight, symbol_func_t func, gpointer user_data); +void register_symbol (struct symbols_cache **cache, const char *name, double weight, symbol_func_t func, gpointer user_data); /** * Call function for cached symbol using saved callback