summaryrefslogtreecommitdiffstats
path: root/src/classifiers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2012-10-04 22:14:10 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2012-10-04 22:14:10 +0400
commit3789849b7b2e617d0a287fe77490b6643f3a6b74 (patch)
tree8415305aee39a3aad6adbccbc1941a62f3a41cf8 /src/classifiers
parent14e1129068d55bc8de0618832d4f7d33bb1b0f06 (diff)
downloadrspamd-3789849b7b2e617d0a287fe77490b6643f3a6b74.tar.gz
rspamd-3789849b7b2e617d0a287fe77490b6643f3a6b74.zip
* Add meta-classification example.
Many changes to advanced statistic and meta-classification logic. Add example of complex meta-classification.
Diffstat (limited to 'src/classifiers')
-rw-r--r--src/classifiers/bayes.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/src/classifiers/bayes.c b/src/classifiers/bayes.c
index cad963c4b..a80bbe0ba 100644
--- a/src/classifiers/bayes.c
+++ b/src/classifiers/bayes.c
@@ -391,6 +391,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
struct statfile *st;
stat_file_t *file;
GList *cur;
+ gboolean skip_labels;
g_assert (pool != NULL);
g_assert (ctx != NULL);
@@ -411,11 +412,14 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
}
}
- cur = call_classifier_pre_callbacks (ctx->cfg, task, FALSE, FALSE, L);
+ cur = call_classifier_pre_callbacks (ctx->cfg, task, TRUE, is_spam, L);
if (cur) {
+ skip_labels = FALSE;
memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
}
else {
+ /* Do not try to learn specific statfiles if pre callback returned nil */
+ skip_labels = TRUE;
cur = ctx->cfg->statfiles;
}
@@ -435,7 +439,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
while (cur) {
/* Select statfiles to learn */
st = cur->data;
- if (st->is_spam != is_spam) {
+ if (st->is_spam != is_spam || (skip_labels && st->label)) {
cur = g_list_next (cur);
continue;
}
@@ -460,8 +464,6 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
msg_err ("cannot open statfile %s after creation", st->path);
return FALSE;
}
- cur = g_list_next (cur);
- continue;
}
}
data.file = file;
@@ -470,6 +472,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
statfile_inc_revision (file);
statfile_pool_unlock_file (pool, data.file);
maybe_write_binlog (ctx->cfg, st, file, input);
+ msg_info ("increase revision for %s", st->path);
cur = g_list_next (cur);
}