diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-07-23 12:45:28 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-07-23 12:45:28 +0100 |
commit | e0483657ff6cf1adc828ccce457814d61fe90a0d (patch) | |
tree | 5183e4163f40b81b3e7d5f51488d360883782154 /src/classifiers | |
parent | 7962087e808fb824aa3af6d41d02abc92916ba1e (diff) | |
download | rspamd-e0483657ff6cf1adc828ccce457814d61fe90a0d.tar.gz rspamd-e0483657ff6cf1adc828ccce457814d61fe90a0d.zip |
Unify code style.
Diffstat (limited to 'src/classifiers')
-rw-r--r-- | src/classifiers/bayes.c | 322 | ||||
-rw-r--r-- | src/classifiers/classifiers.c | 38 | ||||
-rw-r--r-- | src/classifiers/classifiers.h | 90 | ||||
-rw-r--r-- | src/classifiers/winnow.c | 388 |
4 files changed, 517 insertions, 321 deletions
diff --git a/src/classifiers/bayes.c b/src/classifiers/bayes.c index a8a18f5ff..2d3fca084 100644 --- a/src/classifiers/bayes.c +++ b/src/classifiers/bayes.c @@ -25,13 +25,13 @@ /* * Bayesian classifier */ +#include "binlog.h" +#include "cfg_file.h" #include "classifiers.h" -#include "tokenizers/tokenizers.h" -#include "main.h" #include "filter.h" -#include "cfg_file.h" -#include "binlog.h" #include "lua/lua_common.h" +#include "main.h" +#include "tokenizers/tokenizers.h" #define LOCAL_PROB_DENOM 16.0 @@ -42,56 +42,68 @@ bayes_error_quark (void) } struct bayes_statfile_data { - guint64 hits; - guint64 total_hits; - double value; - struct rspamd_statfile_config *st; - stat_file_t *file; + guint64 hits; + guint64 total_hits; + double value; + struct rspamd_statfile_config *st; + stat_file_t *file; }; struct bayes_callback_data { - statfile_pool_t *pool; - struct classifier_ctx *ctx; - gboolean in_class; - time_t now; - stat_file_t *file; - struct bayes_statfile_data *statfiles; - guint32 statfiles_num; - guint64 total_spam; - guint64 total_ham; - guint64 processed_tokens; - gsize max_tokens; - double spam_probability; - double ham_probability; + statfile_pool_t *pool; + struct classifier_ctx *ctx; + gboolean in_class; + time_t now; + stat_file_t *file; + struct bayes_statfile_data *statfiles; + guint32 statfiles_num; + guint64 total_spam; + guint64 total_ham; + guint64 processed_tokens; + gsize max_tokens; + double spam_probability; + double ham_probability; }; -static gboolean +static gboolean bayes_learn_callback (gpointer key, gpointer value, gpointer data) { - token_node_t *node = key; - struct bayes_callback_data *cd = data; - gint c; - guint64 v; + token_node_t *node = key; + struct bayes_callback_data *cd = data; + gint c; + guint64 v; c = (cd->in_class) ? 1 : -1; /* Consider that not found blocks have value 1 */ - v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now); + v = + statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, + cd->now); if (v == 0 && c > 0) { - statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, c); - cd->processed_tokens ++; + statfile_pool_set_block (cd->pool, + cd->file, + node->h1, + node->h2, + cd->now, + c); + cd->processed_tokens++; } else if (v != 0) { if (G_LIKELY (c > 0)) { - v ++; + v++; } - else if (c < 0){ + else if (c < 0) { if (v != 0) { - v --; + v--; } } - statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, v); - cd->processed_tokens ++; + statfile_pool_set_block (cd->pool, + cd->file, + node->h1, + node->h2, + cd->now, + v); + cd->processed_tokens++; } if (cd->max_tokens != 0 && cd->processed_tokens > cd->max_tokens) { @@ -133,7 +145,7 @@ inv_chi_square (gdouble value, gint freedom_deg) return 0; } sum = prob; - for (i = 1; i < freedom_deg / 2; i ++) { + for (i = 1; i < freedom_deg / 2; i++) { prob *= value / (gdouble)i; sum += prob; } @@ -148,16 +160,20 @@ static gboolean bayes_classify_callback (gpointer key, gpointer value, gpointer data) { - token_node_t *node = key; - struct bayes_callback_data *cd = data; - guint i; - struct bayes_statfile_data *cur; - guint64 spam_count = 0, ham_count = 0, total_count = 0; - double spam_prob, spam_freq, ham_freq, bayes_spam_prob; + token_node_t *node = key; + struct bayes_callback_data *cd = data; + guint i; + struct bayes_statfile_data *cur; + guint64 spam_count = 0, ham_count = 0, total_count = 0; + double spam_prob, spam_freq, ham_freq, bayes_spam_prob; - for (i = 0; i < cd->statfiles_num; i ++) { + for (i = 0; i < cd->statfiles_num; i++) { cur = &cd->statfiles[i]; - cur->value = statfile_pool_get_block (cd->pool, cur->file, node->h1, node->h2, cd->now); + cur->value = statfile_pool_get_block (cd->pool, + cur->file, + node->h1, + node->h2, + cd->now); if (cur->value > 0) { cur->total_hits += cur->value; if (cur->st->is_spam) { @@ -178,7 +194,7 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data) bayes_spam_prob = (0.5 + spam_prob * total_count) / (1. + total_count); cd->spam_probability += log (bayes_spam_prob); cd->ham_probability += log (1. - bayes_spam_prob); - cd->processed_tokens ++; + cd->processed_tokens++; } if (cd->max_tokens != 0 && cd->processed_tokens > cd->max_tokens) { @@ -189,10 +205,11 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data) return FALSE; } -struct classifier_ctx* +struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier_config *cfg) { - struct classifier_ctx *ctx = rspamd_mempool_alloc (pool, sizeof (struct classifier_ctx)); + struct classifier_ctx *ctx = + rspamd_mempool_alloc (pool, sizeof (struct classifier_ctx)); ctx->pool = pool; ctx->cfg = cfg; @@ -202,23 +219,28 @@ bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier_config *cfg) } gboolean -bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct rspamd_task *task, lua_State *L) +bayes_classify (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task, + lua_State *L) { - struct bayes_callback_data data; - gchar *value; - gint nodes, i = 0, selected_st = -1, cnt; - gint minnodes; - guint64 maxhits = 0, rev; - double final_prob, h, s; - struct rspamd_statfile_config *st; - stat_file_t *file; - GList *cur; - char *sumbuf; + struct bayes_callback_data data; + gchar *value; + gint nodes, i = 0, selected_st = -1, cnt; + gint minnodes; + guint64 maxhits = 0, rev; + double final_prob, h, s; + struct rspamd_statfile_config *st; + stat_file_t *file; + GList *cur; + char *sumbuf; g_assert (pool != NULL); g_assert (ctx != NULL); - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { + if (ctx->cfg->opts && + (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); nodes = g_tree_nnodes (input); if (nodes > FEATURE_WINDOW_SIZE) { @@ -231,7 +253,8 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, cur = call_classifier_pre_callbacks (ctx->cfg, task, FALSE, FALSE, L); if (cur) { - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur); + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t)g_list_free, cur); } else { cur = ctx->cfg->statfiles; @@ -248,7 +271,8 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, data.ham_probability = 0; data.total_ham = 0; data.total_spam = 0; - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) { + if (ctx->cfg->opts && + (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) { minnodes = rspamd_config_parse_limit (value, -1); data.max_tokens = minnodes; } @@ -260,10 +284,11 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, /* Select statfile to classify */ st = cur->data; if ((file = statfile_pool_is_open (pool, st->path)) == NULL) { - if ((file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { + if ((file = + statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { msg_warn ("cannot open %s", st->path); cur = g_list_next (cur); - data.statfiles_num --; + data.statfiles_num--; continue; } } @@ -278,7 +303,7 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, } cur = g_list_next (cur); - i ++; + i++; } cnt = i; @@ -289,17 +314,19 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, final_prob = 0; } else { - h = 1 - inv_chi_square (-2. * data.spam_probability, 2 * data.processed_tokens); - s = 1 - inv_chi_square (-2. * data.ham_probability, 2 * data.processed_tokens); + h = 1 - inv_chi_square (-2. * data.spam_probability, + 2 * data.processed_tokens); + s = 1 - inv_chi_square (-2. * data.ham_probability, + 2 * data.processed_tokens); final_prob = (s + 1 - h) / 2.; } if (data.processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) { sumbuf = rspamd_mempool_alloc (task->task_pool, 32); - for (i = 0; i < cnt; i ++) { + for (i = 0; i < cnt; i++) { if ((final_prob > 0.5 && !data.statfiles[i].st->is_spam) || - (final_prob < 0.5 && data.statfiles[i].st->is_spam)) { + (final_prob < 0.5 && data.statfiles[i].st->is_spam)) { continue; } if (data.statfiles[i].total_hits > maxhits) { @@ -308,7 +335,8 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, } } if (selected_st == -1) { - msg_err ("unexpected classifier error: cannot select desired statfile"); + msg_err ( + "unexpected classifier error: cannot select desired statfile"); } else { /* Calculate ham probability correctly */ @@ -317,7 +345,10 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, } rspamd_snprintf (sumbuf, 32, "%.2f%%", final_prob * 100.); cur = g_list_prepend (NULL, sumbuf); - insert_result (task, data.statfiles[selected_st].st->symbol, final_prob, cur); + insert_result (task, + data.statfiles[selected_st].st->symbol, + final_prob, + cur); } } @@ -327,34 +358,44 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, } gboolean -bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symbol, GTree *input, - gboolean in_class, double *sum, double multiplier, GError **err) +bayes_learn (struct classifier_ctx * ctx, + statfile_pool_t *pool, + const char *symbol, + GTree *input, + gboolean in_class, + double *sum, + double multiplier, + GError **err) { - struct bayes_callback_data data; - gchar *value; - gint nodes; - gint minnodes; - struct rspamd_statfile_config *st, *sel_st = NULL; - stat_file_t *to_learn; - GList *cur; + struct bayes_callback_data data; + gchar *value; + gint nodes; + gint minnodes; + struct rspamd_statfile_config *st, *sel_st = NULL; + stat_file_t *to_learn; + GList *cur; g_assert (pool != NULL); g_assert (ctx != NULL); - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { + if (ctx->cfg->opts && + (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); nodes = g_tree_nnodes (input); if (nodes > FEATURE_WINDOW_SIZE) { nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; } if (nodes < minnodes) { - msg_info ("do not learn message as it has too few tokens: %d, while %d min", nodes, minnodes); + msg_info ( + "do not learn message as it has too few tokens: %d, while %d min", + nodes, + minnodes); *sum = 0; g_set_error (err, - bayes_error_quark(), /* error domain */ - 1, /* error code */ - "message contains too few tokens: %d, while min is %d", - nodes, (int)minnodes); + bayes_error_quark (), /* error domain */ + 1, /* error code */ + "message contains too few tokens: %d, while min is %d", + nodes, (int)minnodes); return FALSE; } } @@ -365,7 +406,8 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb data.ctx = ctx; data.processed_tokens = 0; data.processed_tokens = 0; - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) { + if (ctx->cfg->opts && + (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) { minnodes = rspamd_config_parse_limit (value, -1); data.max_tokens = minnodes; } @@ -384,31 +426,36 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb } if (sel_st == NULL) { g_set_error (err, - bayes_error_quark(), /* error domain */ - 1, /* error code */ - "cannot find statfile for symbol: %s", - symbol); + bayes_error_quark (), /* error domain */ + 1, /* error code */ + "cannot find statfile for symbol: %s", + symbol); return FALSE; } if ((to_learn = statfile_pool_is_open (pool, sel_st->path)) == NULL) { - if ((to_learn = statfile_pool_open (pool, sel_st->path, sel_st->size, FALSE)) == NULL) { + if ((to_learn = + statfile_pool_open (pool, sel_st->path, sel_st->size, + FALSE)) == NULL) { msg_warn ("cannot open %s", sel_st->path); if (statfile_pool_create (pool, sel_st->path, sel_st->size) == -1) { msg_err ("cannot create statfile %s", sel_st->path); g_set_error (err, - bayes_error_quark(), /* error domain */ - 1, /* error code */ - "cannot create statfile: %s", - sel_st->path); + bayes_error_quark (), /* error domain */ + 1, /* error code */ + "cannot create statfile: %s", + sel_st->path); return FALSE; } - if ((to_learn = statfile_pool_open (pool, sel_st->path, sel_st->size, FALSE)) == NULL) { + if ((to_learn = + statfile_pool_open (pool, sel_st->path, sel_st->size, + FALSE)) == NULL) { g_set_error (err, - bayes_error_quark(), /* error domain */ - 1, /* error code */ - "cannot open statfile %s after creation", - sel_st->path); - msg_err ("cannot open statfile %s after creation", sel_st->path); + bayes_error_quark (), /* error domain */ + 1, /* error code */ + "cannot open statfile %s after creation", + sel_st->path); + msg_err ("cannot open statfile %s after creation", + sel_st->path); return FALSE; } } @@ -427,22 +474,28 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb } gboolean -bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, - GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L, GError **err) +bayes_learn_spam (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task, + gboolean is_spam, + lua_State *L, + GError **err) { - struct bayes_callback_data data; - gchar *value; - gint nodes; - gint minnodes; - struct rspamd_statfile_config *st; - stat_file_t *file; - GList *cur; - gboolean skip_labels; + struct bayes_callback_data data; + gchar *value; + gint nodes; + gint minnodes; + struct rspamd_statfile_config *st; + stat_file_t *file; + GList *cur; + gboolean skip_labels; g_assert (pool != NULL); g_assert (ctx != NULL); - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { + if (ctx->cfg->opts && + (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); nodes = g_tree_nnodes (input); if (nodes > FEATURE_WINDOW_SIZE) { @@ -450,10 +503,10 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, } if (nodes < minnodes) { g_set_error (err, - bayes_error_quark(), /* error domain */ - 1, /* error code */ - "message contains too few tokens: %d, while min is %d", - nodes, (int)minnodes); + bayes_error_quark (), /* error domain */ + 1, /* error code */ + "message contains too few tokens: %d, while min is %d", + nodes, (int)minnodes); return FALSE; } } @@ -461,7 +514,8 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, cur = call_classifier_pre_callbacks (ctx->cfg, task, TRUE, is_spam, L); if (cur) { skip_labels = FALSE; - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur); + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t)g_list_free, cur); } else { /* Do not try to learn specific statfiles if pre callback returned nil */ @@ -475,7 +529,8 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, data.in_class = TRUE; data.processed_tokens = 0; - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) { + if (ctx->cfg->opts && + (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) { minnodes = rspamd_config_parse_limit (value, -1); data.max_tokens = minnodes; } @@ -491,24 +546,28 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, continue; } if ((file = statfile_pool_is_open (pool, st->path)) == NULL) { - if ((file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { + if ((file = + statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { msg_warn ("cannot open %s", st->path); if (statfile_pool_create (pool, st->path, st->size) == -1) { msg_err ("cannot create statfile %s", st->path); g_set_error (err, - bayes_error_quark(), /* error domain */ - 1, /* error code */ - "cannot create statfile: %s", - st->path); + bayes_error_quark (), /* error domain */ + 1, /* error code */ + "cannot create statfile: %s", + st->path); return FALSE; } - if ((file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { + if ((file = + statfile_pool_open (pool, st->path, st->size, + FALSE)) == NULL) { g_set_error (err, - bayes_error_quark(), /* error domain */ - 1, /* error code */ - "cannot open statfile %s after creation", - st->path); - msg_err ("cannot open statfile %s after creation", st->path); + bayes_error_quark (), /* error domain */ + 1, /* error code */ + "cannot open statfile %s after creation", + st->path); + msg_err ("cannot open statfile %s after creation", + st->path); return FALSE; } } @@ -528,7 +587,10 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, } GList * -bayes_weights (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct rspamd_task *task) +bayes_weights (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task) { /* This function is unimplemented with new normalizer */ return NULL; diff --git a/src/classifiers/classifiers.c b/src/classifiers/classifiers.c index fb294379c..95dd52c44 100644 --- a/src/classifiers/classifiers.c +++ b/src/classifiers/classifiers.c @@ -28,29 +28,29 @@ #include "classifiers.h" -struct classifier classifiers[] = { - { - .name = "winnow", - .init_func = winnow_init, - .classify_func = winnow_classify, - .learn_func = winnow_learn, - .learn_spam_func = winnow_learn_spam, - .weights_func = winnow_weights - }, - { - .name = "bayes", - .init_func = bayes_init, - .classify_func = bayes_classify, - .learn_func = bayes_learn, - .learn_spam_func = bayes_learn_spam, - .weights_func = bayes_weights - } +struct classifier classifiers[] = { + { + .name = "winnow", + .init_func = winnow_init, + .classify_func = winnow_classify, + .learn_func = winnow_learn, + .learn_spam_func = winnow_learn_spam, + .weights_func = winnow_weights + }, + { + .name = "bayes", + .init_func = bayes_init, + .classify_func = bayes_classify, + .learn_func = bayes_learn, + .learn_spam_func = bayes_learn_spam, + .weights_func = bayes_weights + } }; -struct classifier * +struct classifier * get_classifier (const char *name) { - guint i; + guint i; for (i = 0; i < sizeof (classifiers) / sizeof (classifiers[0]); i++) { if (strcmp (classifiers[i].name, name) == 0) { diff --git a/src/classifiers/classifiers.h b/src/classifiers/classifiers.h index 2b36d8c02..8e59fc555 100644 --- a/src/classifiers/classifiers.h +++ b/src/classifiers/classifiers.h @@ -28,36 +28,80 @@ struct classify_weight { /* Common classifier structure */ struct classifier { char *name; - struct classifier_ctx* (*init_func)(rspamd_mempool_t *pool, struct rspamd_classifier_config *cf); - gboolean (*classify_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct rspamd_task *task, lua_State *L); - gboolean (*learn_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, - const char *symbol, GTree *input, gboolean in_class, - double *sum, double multiplier, GError **err); - gboolean (*learn_spam_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, - GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L, GError **err); - GList* (*weights_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct rspamd_task *task); + struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool, + struct rspamd_classifier_config *cf); + gboolean (*classify_func)(struct classifier_ctx * ctx, + statfile_pool_t *pool, GTree *input, struct rspamd_task *task, + lua_State *L); + gboolean (*learn_func)(struct classifier_ctx * ctx, statfile_pool_t *pool, + const char *symbol, GTree *input, gboolean in_class, + double *sum, double multiplier, GError **err); + gboolean (*learn_spam_func)(struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L, + GError **err); + GList * (*weights_func)(struct classifier_ctx * ctx, statfile_pool_t *pool, + GTree *input, struct rspamd_task *task); }; /* Get classifier structure by name or return NULL if this name is not found */ -struct classifier* get_classifier (const char *name); +struct classifier * get_classifier (const char *name); /* Winnow algorithm */ -struct classifier_ctx* winnow_init (rspamd_mempool_t *pool, struct rspamd_classifier_config *cf); -gboolean winnow_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct rspamd_task *task, lua_State *L); -gboolean winnow_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symbol, GTree *input, - gboolean in_class, double *sum, double multiplier, GError **err); -gboolean winnow_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, - GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L, GError **err); -GList *winnow_weights (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct rspamd_task *task); +struct classifier_ctx * winnow_init (rspamd_mempool_t *pool, + struct rspamd_classifier_config *cf); +gboolean winnow_classify (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task, + lua_State *L); +gboolean winnow_learn (struct classifier_ctx * ctx, + statfile_pool_t *pool, + const char *symbol, + GTree *input, + gboolean in_class, + double *sum, + double multiplier, + GError **err); +gboolean winnow_learn_spam (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task, + gboolean is_spam, + lua_State *L, + GError **err); +GList * winnow_weights (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task); /* Bayes algorithm */ -struct classifier_ctx* bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier_config *cf); -gboolean bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct rspamd_task *task, lua_State *L); -gboolean bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symbol, GTree *input, - gboolean in_class, double *sum, double multiplier, GError **err); -gboolean bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, - GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L, GError **err); -GList *bayes_weights (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct rspamd_task *task); +struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, + struct rspamd_classifier_config *cf); +gboolean bayes_classify (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task, + lua_State *L); +gboolean bayes_learn (struct classifier_ctx * ctx, + statfile_pool_t *pool, + const char *symbol, + GTree *input, + gboolean in_class, + double *sum, + double multiplier, + GError **err); +gboolean bayes_learn_spam (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task, + gboolean is_spam, + lua_State *L, + GError **err); +GList * bayes_weights (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task); /* Array of all defined classifiers */ extern struct classifier classifiers[]; diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c index 1fe8f16d2..85d8cfa20 100644 --- a/src/classifiers/winnow.c +++ b/src/classifiers/winnow.c @@ -26,12 +26,12 @@ * Winnow classifier */ +#include "cfg_file.h" #include "classifiers.h" -#include "tokenizers/tokenizers.h" -#include "main.h" #include "filter.h" -#include "cfg_file.h" #include "lua/lua_common.h" +#include "main.h" +#include "tokenizers/tokenizers.h" #define WINNOW_PROMOTION 1.23 #define WINNOW_DEMOTION 0.83 @@ -51,40 +51,42 @@ winnow_error_quark (void) } struct winnow_callback_data { - statfile_pool_t *pool; - struct classifier_ctx *ctx; - stat_file_t *file; - stat_file_t *learn_file; - long double sum; - long double start; - double multiplier; - guint32 count; - guint32 new_blocks; - gboolean in_class; - gboolean do_demote; - gboolean fresh_run; - time_t now; + statfile_pool_t *pool; + struct classifier_ctx *ctx; + stat_file_t *file; + stat_file_t *learn_file; + long double sum; + long double start; + double multiplier; + guint32 count; + guint32 new_blocks; + gboolean in_class; + gboolean do_demote; + gboolean fresh_run; + time_t now; }; static const double max_common_weight = MAX_WEIGHT * WINNOW_DEMOTION; -static gboolean +static gboolean winnow_classify_callback (gpointer key, gpointer value, gpointer data) { - token_node_t *node = key; - struct winnow_callback_data *cd = data; - double v; + token_node_t *node = key; + struct winnow_callback_data *cd = data; + double v; /* Consider that not found blocks have value 1 */ - v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now); + v = + statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, + cd->now); if (fabs (v) > ALPHA) { cd->sum += v; } else { cd->sum += 1.0; - cd->new_blocks ++; + cd->new_blocks++; } cd->count++; @@ -92,24 +94,32 @@ winnow_classify_callback (gpointer key, gpointer value, gpointer data) return FALSE; } -static gboolean +static gboolean winnow_learn_callback (gpointer key, gpointer value, gpointer data) { - token_node_t *node = key; - struct winnow_callback_data *cd = data; - double v, c; - - c = (cd->in_class) ? WINNOW_PROMOTION * cd->multiplier : WINNOW_DEMOTION / cd->multiplier; + token_node_t *node = key; + struct winnow_callback_data *cd = data; + double v, c; + + c = (cd->in_class) ? WINNOW_PROMOTION * cd->multiplier : WINNOW_DEMOTION / + cd->multiplier; /* Consider that not found blocks have value 1 */ - v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now); + v = + statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, + cd->now); if (fabs (v) < ALPHA) { /* Block not found, insert new */ cd->start += 1; if (cd->file == cd->learn_file) { - statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, c); + statfile_pool_set_block (cd->pool, + cd->file, + node->h1, + node->h2, + cd->now, + c); node->value = c; - cd->new_blocks ++; + cd->new_blocks++; } } else { @@ -119,18 +129,23 @@ winnow_learn_callback (gpointer key, gpointer value, gpointer data) node->extra = 0; } else { - node->extra ++; + node->extra++; } node->value = v; - + if (node->extra > 1) { - /* + /* * Assume that this node is common for several statfiles, so * decrease its weight proportianally */ if (node->value > max_common_weight) { /* Static fluctuation */ - statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, 0.); + statfile_pool_set_block (cd->pool, + cd->file, + node->h1, + node->h2, + cd->now, + 0.); node->value = 0.; } else if (node->value > WINNOW_PROMOTION * cd->multiplier) { @@ -141,7 +156,7 @@ winnow_learn_callback (gpointer key, gpointer value, gpointer data) node->value *= c; } else { - /* + /* * Too high token value that exists also in other * statfiles, may be statistic error, so decrease it * slightly @@ -152,8 +167,13 @@ winnow_learn_callback (gpointer key, gpointer value, gpointer data) else { node->value = WINNOW_DEMOTION / cd->multiplier; } - statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, node->value); - } + statfile_pool_set_block (cd->pool, + cd->file, + node->h1, + node->h2, + cd->now, + node->value); + } } else if (cd->file == cd->learn_file) { /* New block or block that is in only one statfile */ @@ -164,12 +184,22 @@ winnow_learn_callback (gpointer key, gpointer value, gpointer data) else { node->value *= c; } - statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, node->value); + statfile_pool_set_block (cd->pool, + cd->file, + node->h1, + node->h2, + cd->now, + node->value); } else if (cd->do_demote) { /* Demote blocks in file */ node->value *= WINNOW_DEMOTION / cd->multiplier; - statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, node->value); + statfile_pool_set_block (cd->pool, + cd->file, + node->h1, + node->h2, + cd->now, + node->value); } } @@ -181,10 +211,11 @@ winnow_learn_callback (gpointer key, gpointer value, gpointer data) return FALSE; } -struct classifier_ctx * +struct classifier_ctx * winnow_init (rspamd_mempool_t * pool, struct rspamd_classifier_config *cfg) { - struct classifier_ctx *ctx = rspamd_mempool_alloc (pool, sizeof (struct classifier_ctx)); + struct classifier_ctx *ctx = + rspamd_mempool_alloc (pool, sizeof (struct classifier_ctx)); ctx->pool = pool; ctx->cfg = cfg; @@ -193,14 +224,18 @@ winnow_init (rspamd_mempool_t * pool, struct rspamd_classifier_config *cfg) } gboolean -winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * input, struct rspamd_task *task, lua_State *L) +winnow_classify (struct classifier_ctx *ctx, + statfile_pool_t * pool, + GTree * input, + struct rspamd_task *task, + lua_State *L) { - struct winnow_callback_data data; - char *sumbuf, *value; - long double res = 0., max = 0.; - GList *cur; - struct rspamd_statfile_config *st, *sel = NULL; - int nodes, minnodes; + struct winnow_callback_data data; + char *sumbuf, *value; + long double res = 0., max = 0.; + GList *cur; + struct rspamd_statfile_config *st, *sel = NULL; + int nodes, minnodes; g_assert (pool != NULL); g_assert (ctx != NULL); @@ -208,22 +243,27 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp data.pool = pool; data.now = time (NULL); data.ctx = ctx; - - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { + + if (ctx->cfg->opts && + (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); nodes = g_tree_nnodes (input); if (nodes > FEATURE_WINDOW_SIZE) { nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; } if (nodes < minnodes) { - msg_info ("do not classify message as it has too few tokens: %d, while %d min", nodes, minnodes); + msg_info ( + "do not classify message as it has too few tokens: %d, while %d min", + nodes, + minnodes); return FALSE; } } cur = call_classifier_pre_callbacks (ctx->cfg, task, FALSE, FALSE, L); if (cur) { - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur); + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t)g_list_free, cur); } else { cur = ctx->cfg->statfiles; @@ -235,7 +275,8 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp data.count = 0; data.new_blocks = 0; if ((data.file = statfile_pool_is_open (pool, st->path)) == NULL) { - if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { + if ((data.file = + statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { msg_warn ("cannot open %s, skip it", st->path); cur = g_list_next (cur); continue; @@ -261,16 +302,16 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp if (sel != NULL) { #ifdef WITH_LUA - max = call_classifier_post_callbacks (ctx->cfg, task, max, L); + max = call_classifier_post_callbacks (ctx->cfg, task, max, L); #endif #ifdef HAVE_TANHL - max = tanhl (max); + max = tanhl (max); #else - /* - * As some implementations of libm does not support tanhl, try to use - * tanh - */ - max = tanh ((double) max); + /* + * As some implementations of libm does not support tanhl, try to use + * tanh + */ + max = tanh ((double) max); #endif sumbuf = rspamd_mempool_alloc (task->task_pool, 32); rspamd_snprintf (sumbuf, 32, "%.2F", max); @@ -282,15 +323,18 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp } GList * -winnow_weights (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * input, struct rspamd_task *task) +winnow_weights (struct classifier_ctx *ctx, + statfile_pool_t * pool, + GTree * input, + struct rspamd_task *task) { - struct winnow_callback_data data; - long double res = 0.; - GList *cur, *resl = NULL; - struct rspamd_statfile_config *st; - struct classify_weight *w; - char *value; - int nodes, minnodes; + struct winnow_callback_data data; + long double res = 0.; + GList *cur, *resl = NULL; + struct rspamd_statfile_config *st; + struct classify_weight *w; + char *value; + int nodes, minnodes; g_assert (pool != NULL); g_assert (ctx != NULL); @@ -299,25 +343,30 @@ winnow_weights (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inpu data.now = time (NULL); data.ctx = ctx; - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { + if (ctx->cfg->opts && + (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); nodes = g_tree_nnodes (input); if (nodes > FEATURE_WINDOW_SIZE) { nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; } if (nodes < minnodes) { - msg_info ("do not classify message as it has too few tokens: %d, while %d min", nodes, minnodes); + msg_info ( + "do not classify message as it has too few tokens: %d, while %d min", + nodes, + minnodes); return NULL; } } - + cur = ctx->cfg->statfiles; while (cur) { st = cur->data; data.sum = 0; data.count = 0; if ((data.file = statfile_pool_is_open (pool, st->path)) == NULL) { - if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { + if ((data.file = + statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { msg_warn ("cannot open %s, skip it", st->path); cur = g_list_next (cur); continue; @@ -328,7 +377,9 @@ winnow_weights (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inpu g_tree_foreach (input, winnow_classify_callback, &data); } - w = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct classify_weight)); + w = + rspamd_mempool_alloc0 (task->task_pool, + sizeof (struct classify_weight)); if (data.count != 0) { res = data.sum / (double)data.count; } @@ -340,9 +391,10 @@ winnow_weights (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inpu resl = g_list_prepend (resl, w); cur = g_list_next (cur); } - + if (resl != NULL) { - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, resl); + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t)g_list_free, resl); } return resl; @@ -351,21 +403,27 @@ winnow_weights (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inpu gboolean -winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *symbol, - GTree * input, int in_class, double *sum, double multiplier, GError **err) +winnow_learn (struct classifier_ctx *ctx, + statfile_pool_t *pool, + const char *symbol, + GTree * input, + int in_class, + double *sum, + double multiplier, + GError **err) { - struct winnow_callback_data data = { + struct winnow_callback_data data = { .file = NULL, .multiplier = multiplier }; - char *value; - int nodes, minnodes, iterations = 0; - struct rspamd_statfile_config *st, *sel_st = NULL; - stat_file_t *sel = NULL, *to_learn; - long double res = 0., max = 0., start_value = 0., end_value = 0.; - double learn_threshold = 0.0; - GList *cur, *to_demote = NULL; - gboolean force_learn = FALSE; + char *value; + int nodes, minnodes, iterations = 0; + struct rspamd_statfile_config *st, *sel_st = NULL; + stat_file_t *sel = NULL, *to_learn; + long double res = 0., max = 0., start_value = 0., end_value = 0.; + double learn_threshold = 0.0; + GList *cur, *to_demote = NULL; + gboolean force_learn = FALSE; g_assert (pool != NULL); g_assert (ctx != NULL); @@ -376,29 +434,35 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym data.ctx = ctx; - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { + if (ctx->cfg->opts && + (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); nodes = g_tree_nnodes (input); if (nodes > FEATURE_WINDOW_SIZE) { nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; } if (nodes < minnodes) { - msg_info ("do not learn message as it has too few tokens: %d, while %d min", nodes, minnodes); + msg_info ( + "do not learn message as it has too few tokens: %d, while %d min", + nodes, + minnodes); if (sum != NULL) { *sum = 0; } g_set_error (err, - winnow_error_quark(), /* error domain */ - 1, /* error code */ - "message contains too few tokens: %d, while min is %d", - nodes, minnodes); + winnow_error_quark (), /* error domain */ + 1, /* error code */ + "message contains too few tokens: %d, while min is %d", + nodes, minnodes); return FALSE; } } - if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "learn_threshold")) != NULL) { + if (ctx->cfg->opts && + (value = + g_hash_table_lookup (ctx->cfg->opts, "learn_threshold")) != NULL) { learn_threshold = strtod (value, NULL); } - + if (learn_threshold <= 1.0 && learn_threshold >= 0) { /* Classify message and check target statfile score */ cur = ctx->cfg->statfiles; @@ -406,24 +470,27 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym /* Open or create all statfiles inside classifier */ st = cur->data; if (statfile_pool_is_open (pool, st->path) == NULL) { - if (statfile_pool_open (pool, st->path, st->size, FALSE) == NULL) { + if (statfile_pool_open (pool, st->path, st->size, + FALSE) == NULL) { msg_warn ("cannot open %s", st->path); if (statfile_pool_create (pool, st->path, st->size) == -1) { msg_err ("cannot create statfile %s", st->path); g_set_error (err, - winnow_error_quark(), /* error domain */ - 1, /* error code */ - "cannot create statfile: %s", - st->path); + winnow_error_quark (), /* error domain */ + 1, /* error code */ + "cannot create statfile: %s", + st->path); return FALSE; } - if (statfile_pool_open (pool, st->path, st->size, FALSE) == NULL) { + if (statfile_pool_open (pool, st->path, st->size, + FALSE) == NULL) { g_set_error (err, - winnow_error_quark(), /* error domain */ - 1, /* error code */ - "open statfile %s after creation", - st->path); - msg_err ("cannot open statfile %s after creation", st->path); + winnow_error_quark (), /* error domain */ + 1, /* error code */ + "open statfile %s after creation", + st->path); + msg_err ("cannot open statfile %s after creation", + st->path); return FALSE; } } @@ -437,10 +504,10 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym if (sel_st == NULL) { g_set_error (err, - winnow_error_quark(), /* error domain */ - 1, /* error code */ - "cannot find statfile for symbol %s", - symbol); + winnow_error_quark (), /* error domain */ + 1, /* error code */ + "cannot find statfile for symbol %s", + symbol); msg_err ("cannot find statfile for symbol %s", symbol); return FALSE; } @@ -448,10 +515,10 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym to_learn = statfile_pool_is_open (pool, sel_st->path); if (to_learn == NULL) { g_set_error (err, - winnow_error_quark(), /* error domain */ - 1, /* error code */ - "statfile %s is not opened this maybe if your statfile pool is too small to handle all statfiles", - sel_st->path); + winnow_error_quark (), /* error domain */ + 1, /* error code */ + "statfile %s is not opened this maybe if your statfile pool is too small to handle all statfiles", + sel_st->path); return FALSE; } /* Check target statfile */ @@ -477,10 +544,10 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym data.count = 0; if ((data.file = statfile_pool_is_open (pool, st->path)) == NULL) { g_set_error (err, - winnow_error_quark(), /* error domain */ - 1, /* error code */ - "statfile %s is not opened this maybe if your statfile pool is too small to handle all statfiles", - st->path); + winnow_error_quark (), /* error domain */ + 1, /* error code */ + "statfile %s is not opened this maybe if your statfile pool is too small to handle all statfiles", + st->path); return FALSE; } g_tree_foreach (input, winnow_classify_callback, &data); @@ -498,18 +565,22 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym } } else { - msg_err ("learn threshold is more than 1 or less than 0, so cannot do learn, please check your configuration"); + msg_err ( + "learn threshold is more than 1 or less than 0, so cannot do learn, please check your configuration"); g_set_error (err, - winnow_error_quark(), /* error domain */ - 1, /* error code */ - "bad learn_threshold setting: %.2f", - learn_threshold); + winnow_error_quark (), /* error domain */ + 1, /* error code */ + "bad learn_threshold setting: %.2f", + learn_threshold); return FALSE; } /* If to_demote list is empty this message is already classified correctly */ if (max > WINNOW_PROMOTION && to_demote == NULL && !force_learn) { - msg_info ("this message is already of class %s with threshold %.2f and weight %.2F", - sel_st->symbol, learn_threshold, max); + msg_info ( + "this message is already of class %s with threshold %.2f and weight %.2F", + sel_st->symbol, + learn_threshold, + max); goto end; } data.learn_file = to_learn; @@ -526,7 +597,8 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym if ((data.file = statfile_pool_is_open (pool, st->path)) == NULL) { return FALSE; } - if (to_demote != NULL && g_list_find (to_demote, data.file) != NULL) { + if (to_demote != NULL && + g_list_find (to_demote, data.file) != NULL) { data.do_demote = TRUE; } else { @@ -557,48 +629,66 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym } data.multiplier *= WINNOW_PROMOTION; - msg_info ("learn iteration %d for statfile %s: %G -> %G, multiplier: %.2f", iterations + 1, symbol, - start_value, end_value, data.multiplier); - } while ((in_class ? sel != to_learn : sel == to_learn) && iterations ++ < MAX_LEARN_ITERATIONS); - + msg_info ( + "learn iteration %d for statfile %s: %G -> %G, multiplier: %.2f", + iterations + 1, + symbol, + start_value, + end_value, + data.multiplier); + } while ((in_class ? sel != to_learn : sel == + to_learn) && iterations++ < MAX_LEARN_ITERATIONS); + if (iterations >= MAX_LEARN_ITERATIONS) { - msg_warn ("learning statfile %s was not fully successfull: iterations count is limited to %d, final sum is %G", - sel_st->symbol, MAX_LEARN_ITERATIONS, max); + msg_warn ( + "learning statfile %s was not fully successfull: iterations count is limited to %d, final sum is %G", + sel_st->symbol, + MAX_LEARN_ITERATIONS, + max); g_set_error (err, - winnow_error_quark(), /* error domain */ - 1, /* error code */ - "learning statfile %s was not fully successfull: iterations count is limited to %d", - sel_st->symbol, MAX_LEARN_ITERATIONS); + winnow_error_quark (), /* error domain */ + 1, /* error code */ + "learning statfile %s was not fully successfull: iterations count is limited to %d", + sel_st->symbol, MAX_LEARN_ITERATIONS); return FALSE; } else { - msg_info ("learned statfile %s successfully with %d iterations and sum %G", sel_st->symbol, iterations + 1, max); + msg_info ( + "learned statfile %s successfully with %d iterations and sum %G", + sel_st->symbol, + iterations + 1, + max); } end: if (sum) { #ifdef HAVE_TANHL - *sum = (double)tanhl (max); + *sum = (double)tanhl (max); #else - /* - * As some implementations of libm does not support tanhl, try to use - * tanh - */ - *sum = tanh ((double) max); + /* + * As some implementations of libm does not support tanhl, try to use + * tanh + */ + *sum = tanh ((double) max); #endif } return TRUE; } gboolean -winnow_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, - GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L, GError **err) +winnow_learn_spam (struct classifier_ctx * ctx, + statfile_pool_t *pool, + GTree *input, + struct rspamd_task *task, + gboolean is_spam, + lua_State *L, + GError **err) { g_set_error (err, - winnow_error_quark(), /* error domain */ - 1, /* error code */ - "learn spam is not supported for winnow" - ); + winnow_error_quark (), /* error domain */ + 1, /* error code */ + "learn spam is not supported for winnow" + ); return FALSE; } |