aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat/classifiers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-06-20 22:17:46 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-06-20 22:17:46 +0100
commit96d2adcc5b55c6d1d0463bd051495fbf69f9fcc4 (patch)
treeadc45dca2bc9cf085fa0ae447004a18296feec6c /src/libstat/classifiers
parent877d0b187838af4a8797b30dbaac13f240c6f61d (diff)
downloadrspamd-96d2adcc5b55c6d1d0463bd051495fbf69f9fcc4.tar.gz
rspamd-96d2adcc5b55c6d1d0463bd051495fbf69f9fcc4.zip
Fix extreme cases in bayes classifier.
Diffstat (limited to 'src/libstat/classifiers')
-rw-r--r--src/libstat/classifiers/bayes.c34
1 files changed, 29 insertions, 5 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c
index d04341c3c..87cc6e464 100644
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -176,11 +176,35 @@ bayes_classify (struct classifier_ctx * ctx,
2 * rt->processed_tokens);
s = 1 - inv_chi_square (-2. * rt->ham_prob,
2 * rt->processed_tokens);
- final_prob = (s + 1.0 - h) / 2.;
- msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f,"
- " %L tokens processed of %ud total tokens",
- task->message_id, rt->ham_prob, h, rt->spam_prob, s,
- rt->processed_tokens, g_tree_nnodes (input));
+
+ if (isnormal (s) && isnormal (h)) {
+ final_prob = (s + 1.0 - h) / 2.;
+ msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f,"
+ " %L tokens processed of %ud total tokens",
+ task->message_id, rt->ham_prob, h, rt->spam_prob, s,
+ rt->processed_tokens, g_tree_nnodes (input));
+ }
+ else {
+ /*
+ * We have some overflow, hence we need to check which class
+ * is NaN
+ */
+ if (isnormal (h)) {
+ final_prob = 1.0;
+ msg_debug ("<%s> spam class is overflowed, as we have no"
+ " ham samples", task->message_id);
+ }
+ else if (isnormal (s)){
+ final_prob = 0.0;
+ msg_debug ("<%s> spam class is overflowed, as we have no"
+ " spam samples", task->message_id);
+ }
+ else {
+ final_prob = 0.5;
+ msg_warn ("<%s> spam and ham classes are both overflowed",
+ task->message_id);
+ }
+ }
}
if (rt->processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) {