diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-11-23 10:04:00 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-11-23 10:04:00 +0000 |
commit | bfeff0ab44bf0062a6ad3083b3becde22f08d4cf (patch) | |
tree | af92cb21f25c3a739dfca82d0140c74b194ff336 /src/libstat/classifiers | |
parent | 62fee4b415f9c4e738f7fd5dea441dabe1244d2d (diff) | |
download | rspamd-bfeff0ab44bf0062a6ad3083b3becde22f08d4cf.tar.gz rspamd-bfeff0ab44bf0062a6ad3083b3becde22f08d4cf.zip |
Some more fixes to OSB algorithm
Diffstat (limited to 'src/libstat/classifiers')
-rw-r--r-- | src/libstat/classifiers/bayes.c | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 966d5b458..3d16c05eb 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -94,7 +94,11 @@ struct bayes_task_closure { struct rspamd_task *task; }; -static const double feature_weight[] = { 0, 3125, 256, 27, 4, 1 }; +/* + * Mathematically we use pow(complexity, complexity), where complexity is the + * window index + */ +static const double feature_weight[] = { 0, 1, 4, 27, 256, 3125, 46656, 823543 }; #define PROB_COMBINE(prob, cnt, weight, assumed) (((weight) * (assumed) + (cnt) * (prob)) / ((weight) + (cnt))) /* @@ -151,11 +155,12 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data) rt->ham_prob += log (bayes_ham_prob); res->cl_runtime->processed_tokens ++; - msg_debug_bayes ("token: total_count: %L, spam_count: %L, ham_count: %L," - " spam_prob: %.3f, " - "ham_prob: %.3f, bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, " + msg_debug_bayes ("token: weight: %f, total_count: %L, " + "spam_count: %L, ham_count: %L," + "spam_prob: %.3f, ham_prob: %.3f, " + "bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, " "current spam prob: %.3f, current ham prob: %.3f", - total_count, spam_count, ham_count, + fw, total_count, spam_count, ham_count, spam_prob, ham_prob, bayes_spam_prob, bayes_ham_prob, rt->spam_prob, rt->ham_prob); |