diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-03-02 17:35:34 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-03-02 17:35:34 +0000 |
commit | 3053127c4119f522c60a58192c58a7775acd7577 (patch) | |
tree | 5a352634eab784c95d0a1b532583f741e84f04d7 /src/libstat/classifiers | |
parent | ac1748b066ce20567a83de6352376963e7563af1 (diff) | |
download | rspamd-3053127c4119f522c60a58192c58a7775acd7577.tar.gz rspamd-3053127c4119f522c60a58192c58a7775acd7577.zip |
Start splitting statistic processing to separate stages.
Diffstat (limited to 'src/libstat/classifiers')
-rw-r--r-- | src/libstat/classifiers/bayes.c | 82 |
1 files changed, 43 insertions, 39 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 7932ceb9e..823f5eff9 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -151,55 +151,59 @@ bayes_classify (struct classifier_ctx * ctx, g_assert (rt != NULL); g_assert (rt->end_pos > rt->start_pos); - g_tree_foreach (input, bayes_classify_callback, rt); - - if (rt->spam_prob == 0) { - final_prob = 0; + if (rt->stage == RSPAMD_STAT_STAGE_PRE) { + g_tree_foreach (input, bayes_classify_callback, rt); } else { - h = 1 - inv_chi_square (-2. * rt->spam_prob, - 2 * rt->processed_tokens); - s = 1 - inv_chi_square (-2. * rt->ham_prob, - 2 * rt->processed_tokens); - final_prob = (s + 1 - h) / 2.; - msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f", - task->message_id, rt->ham_prob, h, rt->spam_prob, s); - } - if (rt->processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) { + if (rt->spam_prob == 0) { + final_prob = 0; + } + else { + h = 1 - inv_chi_square (-2. * rt->spam_prob, + 2 * rt->processed_tokens); + s = 1 - inv_chi_square (-2. * rt->ham_prob, + 2 * rt->processed_tokens); + final_prob = (s + 1 - h) / 2.; + msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f", + task->message_id, rt->ham_prob, h, rt->spam_prob, s); + } + + if (rt->processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) { - sumbuf = rspamd_mempool_alloc (task->task_pool, 32); - cur = g_list_first (rt->st_runtime); + sumbuf = rspamd_mempool_alloc (task->task_pool, 32); + cur = g_list_first (rt->st_runtime); - while (cur) { - st = (struct rspamd_statfile_runtime *)cur->data; + while (cur) { + st = (struct rspamd_statfile_runtime *)cur->data; - if ((final_prob < 0.5 && !st->st->is_spam) || - (final_prob > 0.5 && st->st->is_spam)) { - if (st->total_hits > maxhits) { - maxhits = st->total_hits; - selected_st = st; + if ((final_prob < 0.5 && !st->st->is_spam) || + (final_prob > 0.5 && st->st->is_spam)) { + if (st->total_hits > maxhits) { + maxhits = st->total_hits; + selected_st = st; + } } - } - cur = g_list_next (cur); - } + cur = g_list_next (cur); + } - if (selected_st == NULL) { - msg_err ( - "unexpected classifier error: cannot select desired statfile"); - } - else { - /* Calculate ham probability correctly */ - if (final_prob < 0.5) { - final_prob = 1. - final_prob; + if (selected_st == NULL) { + msg_err ( + "unexpected classifier error: cannot select desired statfile"); + } + else { + /* Calculate ham probability correctly */ + if (final_prob < 0.5) { + final_prob = 1. - final_prob; + } + rspamd_snprintf (sumbuf, 32, "%.2f%%", final_prob * 100.); + cur = g_list_prepend (NULL, sumbuf); + rspamd_task_insert_result (task, + selected_st->st->symbol, + final_prob, + cur); } - rspamd_snprintf (sumbuf, 32, "%.2f%%", final_prob * 100.); - cur = g_list_prepend (NULL, sumbuf); - rspamd_task_insert_result (task, - selected_st->st->symbol, - final_prob, - cur); } } |