From f411d46ff24dc2bcc18fd2ea29f1f16612fc676d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 22 Jan 2009 18:42:50 +0300 Subject: [PATCH] * Rework statfiles result processing * Fix small bug in protocol implementation (\r -> \r\n) --- rspamc.pl | 4 ++-- src/cfg_utils.c | 12 +++++++++++ src/classifiers/classifiers.c | 2 +- src/classifiers/classifiers.h | 2 ++ src/classifiers/winnow.c | 6 ++++++ src/filter.c | 40 ++++++++++++++++++++++++----------- src/protocol.c | 3 ++- src/worker.c | 2 ++ 8 files changed, 55 insertions(+), 16 deletions(-) diff --git a/rspamc.pl b/rspamc.pl index 46940987c..e04a5bfce 100755 --- a/rspamc.pl +++ b/rspamc.pl @@ -103,8 +103,8 @@ sub do_rspamc_command { syswrite $sock, "Content-Length: " . length ($input) . $CRLF . $CRLF; syswrite $sock, $input; syswrite $sock, $CRLF; - while (<$sock>) { - print $_; + while (defined (my $line = <$sock>)) { + print $line; } } diff --git a/src/cfg_utils.c b/src/cfg_utils.c index 2231a2d96..310c54fd2 100644 --- a/src/cfg_utils.c +++ b/src/cfg_utils.c @@ -18,12 +18,15 @@ #include "config.h" #include "cfg_file.h" #include "main.h" +#include "filter.h" #ifndef HAVE_OWN_QUEUE_H #include #else #include "queue.h" #endif +#define DEFAULT_SCORE 10.0 + extern int yylineno; extern char *yytext; @@ -163,6 +166,8 @@ parse_bind_line (struct config_file *cf, char *str, char is_control) void init_defaults (struct config_file *cfg) { + struct metric *def_metric; + cfg->memcached_error_time = DEFAULT_UPSTREAM_ERROR_TIME; cfg->memcached_dead_time = DEFAULT_UPSTREAM_DEAD_TIME; cfg->memcached_maxerrors = DEFAULT_UPSTREAM_MAXERRORS; @@ -179,6 +184,13 @@ init_defaults (struct config_file *cfg) cfg->statfiles = g_hash_table_new (g_str_hash, g_str_equal); cfg->cfg_params = g_hash_table_new (g_str_hash, g_str_equal); + def_metric = memory_pool_alloc (cfg->cfg_pool, sizeof (struct metric)); + def_metric->name = "default"; + def_metric->func_name = "factors"; + def_metric->func = factor_consolidation_func; + def_metric->required_score = DEFAULT_SCORE; + g_hash_table_insert (cfg->metrics, "default", def_metric); + LIST_INIT (&cfg->perl_modules); } diff --git a/src/classifiers/classifiers.c b/src/classifiers/classifiers.c index 5dab03122..aabdd7590 100644 --- a/src/classifiers/classifiers.c +++ b/src/classifiers/classifiers.c @@ -6,7 +6,7 @@ #include "classifiers.h" struct classifier classifiers[] = { - {"winnow", winnow_classify, winnow_learn }, + {"winnow", winnow_classify, winnow_learn, winnow_add_result }, }; struct classifier* diff --git a/src/classifiers/classifiers.h b/src/classifiers/classifiers.h index eb5e5de4e..71a08c684 100644 --- a/src/classifiers/classifiers.h +++ b/src/classifiers/classifiers.h @@ -16,6 +16,7 @@ struct classifier { char *name; double (*classify_func)(statfile_pool_t *pool, char *statfile, GTree *input); void (*learn_func)(statfile_pool_t *pool, char *statfile, GTree *input, int in_class); + double (*add_result_func)(double result, double new); }; /* Get classifier structure by name or return NULL if this name is not found */ @@ -23,6 +24,7 @@ struct classifier* get_classifier (char *name); /* Winnow algorithm */ double winnow_classify (statfile_pool_t *pool, char *statfile, GTree *input); void winnow_learn (statfile_pool_t *pool, char *statfile, GTree *input, int in_class); +double winnow_add_result (double result, double new); /* Array of all defined classifiers */ extern struct classifier classifiers[]; diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c index 552f054b2..29aa94899 100644 --- a/src/classifiers/winnow.c +++ b/src/classifiers/winnow.c @@ -105,3 +105,9 @@ winnow_learn (statfile_pool_t *pool, char *statfile, GTree *input, int in_class) statfile_pool_unlock_file (pool, statfile); } + +double +winnow_add_result (double result, double new) +{ + return result + new; +} diff --git a/src/filter.c b/src/filter.c index f45d718f2..d4953caf2 100644 --- a/src/filter.c +++ b/src/filter.c @@ -349,6 +349,12 @@ struct statfile_callback_data { struct worker_task *task; }; +struct statfile_result { + double weight; + GList *symbols; + struct classifier *classifier; +}; + static void statfiles_callback (gpointer key, gpointer value, void *arg) { @@ -357,7 +363,8 @@ statfiles_callback (gpointer key, gpointer value, void *arg) struct statfile *st = (struct statfile *)value; GTree *tokens = NULL; char *filename; - double weight, *w; + double weight; + struct statfile_result *res; GList *cur = NULL; GByteArray *content; f_str_t c; @@ -392,13 +399,16 @@ statfiles_callback (gpointer key, gpointer value, void *arg) msg_debug ("process_statfiles: got classify weight: %.2f", weight); if (weight > 0.000001) { - if ((w = g_hash_table_lookup (data->metrics, st->metric)) == NULL) { - w = memory_pool_alloc (task->task_pool, sizeof (double)); - *w = weight * st->weight; - g_hash_table_insert (data->metrics, st->metric, w); + + if ((res = g_hash_table_lookup (data->metrics, st->metric)) == NULL) { + res = memory_pool_alloc (task->task_pool, sizeof (struct statfile_result)); + res->symbols = g_list_prepend (NULL, st->alias); + res->weight = st->classifier->add_result_func (0, weight); + g_hash_table_insert (data->metrics, st->metric, res); } else { - *w += weight * st->weight; + res->symbols = g_list_prepend (NULL, st->alias); + res->weight = st->classifier->add_result_func (res->weight, weight); } } @@ -410,10 +420,10 @@ statfiles_results_callback (gpointer key, gpointer value, void *arg) struct worker_task *task = (struct worker_task *)arg; struct metric_result *metric_res; struct metric *metric; - double w; + struct statfile_result *res = (struct statfile_result *)value; + GList *cur_symbol; metric_res = g_hash_table_lookup (task->results, (char *)key); - w = *(double *)value; metric = g_hash_table_lookup (task->worker->srv->cfg->metrics, (char *)key); if (metric == NULL) { @@ -426,13 +436,19 @@ statfiles_results_callback (gpointer key, gpointer value, void *arg) metric_res->symbols = g_hash_table_new (g_str_hash, g_str_equal); memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_hash_table_destroy, metric_res->symbols); metric_res->metric = metric; - metric_res->score = w; - g_hash_table_insert (task->results, key, metric_res); + metric_res->score = res->weight; + g_hash_table_insert (task->results, metric->name, metric_res); } else { - metric_res->score += w; + metric_res->score += res->weight; + } + + cur_symbol = g_list_first (res->symbols); + while (cur_symbol) { + msg_debug ("statfiles_results_callback: insert symbol %s to metric %s", (char *)cur_symbol->data, metric->name); + g_hash_table_insert (metric_res->symbols, (char *)cur_symbol->data, GSIZE_TO_POINTER (1)); + cur_symbol = g_list_next (cur_symbol); } - g_hash_table_insert (metric_res->symbols, key, GSIZE_TO_POINTER (1)); } diff --git a/src/protocol.c b/src/protocol.c index 0fb7764c0..3cdc28ef5 100644 --- a/src/protocol.c +++ b/src/protocol.c @@ -401,7 +401,8 @@ show_metric_symbols (gpointer metric_name, gpointer metric_value, void *user_dat cur = g_list_next (cur); } g_list_free (symbols); - outbuf[r++] = '\r'; outbuf[r] = '\n'; + msg_debug ("show_metric_symbols: write symbols line: %s", outbuf); + outbuf[r++] = '\r'; outbuf[r++] = '\n'; bufferevent_write (task->bev, outbuf, r); } diff --git a/src/worker.c b/src/worker.c index 809adddda..bed6e3d91 100644 --- a/src/worker.c +++ b/src/worker.c @@ -230,6 +230,8 @@ accept_socket (int fd, short what, void *arg) new_task->task_pool = memory_pool_new (memory_pool_get_size ()); /* Add destructor for recipients list (it would be better to use anonymous function here */ memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task); + new_task->results = g_hash_table_new (g_str_hash, g_str_equal); + memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)g_hash_table_destroy, new_task->results); worker->srv->stat->connections_count ++; /* Read event */ -- 2.39.5