]> source.dussan.org Git - rspamd.git/commitdiff
Fix bayes probability calculations.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 30 Apr 2015 12:49:16 +0000 (13:49 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 30 Apr 2015 12:49:16 +0000 (13:49 +0100)
src/libstat/classifiers/bayes.c

index 1e516ecbb8b1318d093a358f15153c5084657a58..b3ffe225432084369a62701f021fccc8eb97182d 100644 (file)
@@ -104,7 +104,6 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
                        }
                        total_count += res->value;
                        res->st_runtime->total_hits += res->value;
-                       res->cl_runtime->processed_tokens ++;
                }
        }
 
@@ -116,6 +115,7 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
                bayes_spam_prob = (0.5 + spam_prob * total_count) / (1. + total_count);
                rt->spam_prob += log (bayes_spam_prob);
                rt->ham_prob += log (1. - bayes_spam_prob);
+               res->cl_runtime->processed_tokens ++;
        }
 
        return FALSE;
@@ -165,8 +165,10 @@ bayes_classify (struct classifier_ctx * ctx,
                        s = 1 - inv_chi_square (-2. * rt->ham_prob,
                                        2 * rt->processed_tokens);
                        final_prob = (s + 1 - h) / 2.;
-                       msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f",
-                                       task->message_id, rt->ham_prob, h, rt->spam_prob, s);
+                       msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f,"
+                                       " %L tokens processed of %ud total tokens",
+                                       task->message_id, rt->ham_prob, h, rt->spam_prob, s,
+                                       rt->processed_tokens, g_tree_nnodes (input));
                }
 
                if (rt->processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) {