aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat/classifiers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-04-30 13:49:16 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-04-30 13:49:16 +0100
commitf44290c814a4274f31dca274c4fd75471d58aa1b (patch)
treeed46c769d1494aab85103b55bc8facb7d5339d73 /src/libstat/classifiers
parent4f38c0991c8aab1b4b0f272d9c697a8753cb0ada (diff)
downloadrspamd-f44290c814a4274f31dca274c4fd75471d58aa1b.tar.gz
rspamd-f44290c814a4274f31dca274c4fd75471d58aa1b.zip
Fix bayes probability calculations.
Diffstat (limited to 'src/libstat/classifiers')
-rw-r--r--src/libstat/classifiers/bayes.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c
index 1e516ecbb..b3ffe2254 100644
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -104,7 +104,6 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
}
total_count += res->value;
res->st_runtime->total_hits += res->value;
- res->cl_runtime->processed_tokens ++;
}
}
@@ -116,6 +115,7 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
bayes_spam_prob = (0.5 + spam_prob * total_count) / (1. + total_count);
rt->spam_prob += log (bayes_spam_prob);
rt->ham_prob += log (1. - bayes_spam_prob);
+ res->cl_runtime->processed_tokens ++;
}
return FALSE;
@@ -165,8 +165,10 @@ bayes_classify (struct classifier_ctx * ctx,
s = 1 - inv_chi_square (-2. * rt->ham_prob,
2 * rt->processed_tokens);
final_prob = (s + 1 - h) / 2.;
- msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f",
- task->message_id, rt->ham_prob, h, rt->spam_prob, s);
+ msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f,"
+ " %L tokens processed of %ud total tokens",
+ task->message_id, rt->ham_prob, h, rt->spam_prob, s,
+ rt->processed_tokens, g_tree_nnodes (input));
}
if (rt->processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) {