From: Vsevolod Stakhov Date: Wed, 28 Jan 2015 14:41:43 +0000 (+0000) Subject: Fix learning. X-Git-Tag: 0.9.0~820 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=7ab573abf4036d2b1327099ac7c6634a80e455d7;p=rspamd.git Fix learning. --- diff --git a/src/libstat/backends/mmaped_file.c b/src/libstat/backends/mmaped_file.c index d31065376..8965353a1 100644 --- a/src/libstat/backends/mmaped_file.c +++ b/src/libstat/backends/mmaped_file.c @@ -188,13 +188,19 @@ rspamd_mmaped_file_set_block_common (rspamd_mmaped_file_ctx * pool, } /* First try to find block in chain */ if (block->hash1 == h1 && block->hash2 == h2) { + msg_debug ("%s found existing block %ud in chain %ud, value %.2f", + file->filename, + i, + blocknum, + value); block->value = value; return; } /* Check whether we have a free block in chain */ if (block->hash1 == 0 && block->hash2 == 0) { /* Write new block here */ - msg_debug ("found free block %ud in chain %ud, set h1=%ud, h2=%ud", + msg_debug ("%s found free block %ud in chain %ud, set h1=%ud, h2=%ud", + file->filename, i, blocknum, h1, diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 2b1fd629c..cdf45a80a 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -135,7 +135,7 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d) static GList* rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx, struct rspamd_task *task, struct rspamd_tokenizer_runtime *tklist, - lua_State *L, gboolean learn, GError **err) + lua_State *L, gboolean learn, gboolean spam, GError **err) { struct rspamd_classifier_config *clcf; struct rspamd_statfile_config *stcf; @@ -185,6 +185,12 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx, while (curst != NULL) { stcf = (struct rspamd_statfile_config *)curst->data; + /* On learning skip statfiles that do not belong to class */ + if (learn && (spam != stcf->is_spam)) { + curst = g_list_next (curst); + continue; + } + bk = rspamd_stat_get_backend (stcf->backend); if (bk == NULL) { @@ -229,6 +235,9 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx, cl_runtime->start_pos = start_pos; cl_runtime->end_pos = end_pos; + msg_debug ("added runtime for %s classifier from %ud to %ud", + clcf->name, start_pos, end_pos); + start_pos = end_pos; /* Next classifier */ @@ -344,8 +353,8 @@ rspamd_stat_classify (struct rspamd_task *task, lua_State *L, GError **err) } /* Initialize classifiers and statfiles runtime */ - if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, tklist, L, FALSE, err)) - == NULL) { + if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, tklist, L, + FALSE, FALSE, err)) == NULL) { return FALSE; } @@ -474,8 +483,8 @@ rspamd_stat_learn (struct rspamd_task *task, gboolean spam, lua_State *L, } /* Initialize classifiers and statfiles runtime */ - if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, tklist, L, TRUE, err)) - == NULL) { + if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, tklist, L, + TRUE, spam, err)) == NULL) { return FALSE; } @@ -490,6 +499,8 @@ rspamd_stat_learn (struct rspamd_task *task, gboolean spam, lua_State *L, if (cl_ctx != NULL) { if (cl_run->cl->learn_spam_func (cl_ctx, cl_run->tok->tokens, cl_run, task, spam, err)) { + msg_debug ("learned %s classifier %s", spam ? "spam" : "ham", + cl_run->clcf->name); ret = TRUE; cbdata.classifier_runtimes = cur;