]> source.dussan.org Git - rspamd.git/commitdiff
Some fixes to bayes.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 28 Jan 2015 15:39:32 +0000 (15:39 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 28 Jan 2015 15:39:32 +0000 (15:39 +0000)
src/libstat/classifiers/bayes.c
src/libstat/stat_process.c

index 979329d4efc9cc62e9ca3f6cd74e57d77dfc17d5..be6c6f5452942f9295196126c03895cf73011595 100644 (file)
@@ -103,6 +103,8 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
                                ham_count += res->value;
                        }
                        total_count += res->value;
+                       res->st_runtime->total_hits += res->value;
+                       res->cl_runtime->processed_tokens ++;
                }
        }
 
@@ -160,6 +162,8 @@ bayes_classify (struct classifier_ctx * ctx,
                s = 1 - inv_chi_square (-2. * rt->ham_prob,
                                2 * rt->processed_tokens);
                final_prob = (s + 1 - h) / 2.;
+               msg_debug ("<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f",
+                               task->message_id, rt->ham_prob, h, rt->spam_prob, s);
        }
 
        if (rt->processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) {
index b0e8ffdb9cdb3f495ea7f41d205b4d743ff8bb2e..022edde7da2487a5c2b5a6a6a6fd38f613162e9e 100644 (file)
@@ -109,7 +109,6 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d)
 
                        if (st_runtime->backend->process_token (t, res,
                                        st_runtime->backend->ctx)) {
-                               cl_runtime->processed_tokens ++;
 
                                if (cl_runtime->clcf->max_tokens > 0 &&
                                                cl_runtime->processed_tokens > cl_runtime->clcf->max_tokens) {