aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat/classifiers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-11-23 10:04:00 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-11-23 10:04:00 +0000
commitbfeff0ab44bf0062a6ad3083b3becde22f08d4cf (patch)
treeaf92cb21f25c3a739dfca82d0140c74b194ff336 /src/libstat/classifiers
parent62fee4b415f9c4e738f7fd5dea441dabe1244d2d (diff)
downloadrspamd-bfeff0ab44bf0062a6ad3083b3becde22f08d4cf.tar.gz
rspamd-bfeff0ab44bf0062a6ad3083b3becde22f08d4cf.zip
Some more fixes to OSB algorithm
Diffstat (limited to 'src/libstat/classifiers')
-rw-r--r--src/libstat/classifiers/bayes.c15
1 files changed, 10 insertions, 5 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c
index 966d5b458..3d16c05eb 100644
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -94,7 +94,11 @@ struct bayes_task_closure {
struct rspamd_task *task;
};
-static const double feature_weight[] = { 0, 3125, 256, 27, 4, 1 };
+/*
+ * Mathematically we use pow(complexity, complexity), where complexity is the
+ * window index
+ */
+static const double feature_weight[] = { 0, 1, 4, 27, 256, 3125, 46656, 823543 };
#define PROB_COMBINE(prob, cnt, weight, assumed) (((weight) * (assumed) + (cnt) * (prob)) / ((weight) + (cnt)))
/*
@@ -151,11 +155,12 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
rt->ham_prob += log (bayes_ham_prob);
res->cl_runtime->processed_tokens ++;
- msg_debug_bayes ("token: total_count: %L, spam_count: %L, ham_count: %L,"
- " spam_prob: %.3f, "
- "ham_prob: %.3f, bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
+ msg_debug_bayes ("token: weight: %f, total_count: %L, "
+ "spam_count: %L, ham_count: %L,"
+ "spam_prob: %.3f, ham_prob: %.3f, "
+ "bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
"current spam prob: %.3f, current ham prob: %.3f",
- total_count, spam_count, ham_count,
+ fw, total_count, spam_count, ham_count,
spam_prob, ham_prob,
bayes_spam_prob, bayes_ham_prob,
rt->spam_prob, rt->ham_prob);