diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-04-30 13:49:16 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-04-30 13:49:16 +0100 |
commit | f44290c814a4274f31dca274c4fd75471d58aa1b (patch) | |
tree | ed46c769d1494aab85103b55bc8facb7d5339d73 /src/libstat/classifiers | |
parent | 4f38c0991c8aab1b4b0f272d9c697a8753cb0ada (diff) | |
download | rspamd-f44290c814a4274f31dca274c4fd75471d58aa1b.tar.gz rspamd-f44290c814a4274f31dca274c4fd75471d58aa1b.zip |
Fix bayes probability calculations.
Diffstat (limited to 'src/libstat/classifiers')
-rw-r--r-- | src/libstat/classifiers/bayes.c | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 1e516ecbb..b3ffe2254 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -104,7 +104,6 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data) } total_count += res->value; res->st_runtime->total_hits += res->value; - res->cl_runtime->processed_tokens ++; } } @@ -116,6 +115,7 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data) bayes_spam_prob = (0.5 + spam_prob * total_count) / (1. + total_count); rt->spam_prob += log (bayes_spam_prob); rt->ham_prob += log (1. - bayes_spam_prob); + res->cl_runtime->processed_tokens ++; } return FALSE; @@ -165,8 +165,10 @@ bayes_classify (struct classifier_ctx * ctx, s = 1 - inv_chi_square (-2. * rt->ham_prob, 2 * rt->processed_tokens); final_prob = (s + 1 - h) / 2.; - msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f", - task->message_id, rt->ham_prob, h, rt->spam_prob, s); + msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f," + " %L tokens processed of %ud total tokens", + task->message_id, rt->ham_prob, h, rt->spam_prob, s, + rt->processed_tokens, g_tree_nnodes (input)); } if (rt->processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) { |