From f44290c814a4274f31dca274c4fd75471d58aa1b Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 30 Apr 2015 13:49:16 +0100 Subject: [PATCH] Fix bayes probability calculations. --- src/libstat/classifiers/bayes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 1e516ecbb..b3ffe2254 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -104,7 +104,6 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data) } total_count += res->value; res->st_runtime->total_hits += res->value; - res->cl_runtime->processed_tokens ++; } } @@ -116,6 +115,7 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data) bayes_spam_prob = (0.5 + spam_prob * total_count) / (1. + total_count); rt->spam_prob += log (bayes_spam_prob); rt->ham_prob += log (1. - bayes_spam_prob); + res->cl_runtime->processed_tokens ++; } return FALSE; @@ -165,8 +165,10 @@ bayes_classify (struct classifier_ctx * ctx, s = 1 - inv_chi_square (-2. * rt->ham_prob, 2 * rt->processed_tokens); final_prob = (s + 1 - h) / 2.; - msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f", - task->message_id, rt->ham_prob, h, rt->spam_prob, s); + msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f," + " %L tokens processed of %ud total tokens", + task->message_id, rt->ham_prob, h, rt->spam_prob, s, + rt->processed_tokens, g_tree_nnodes (input)); } if (rt->processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) { -- 2.39.5