aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat/classifiers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-15 19:34:20 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-15 19:34:20 +0000
commitdf529a8d06aa25285362175c75c5275b9566b0b1 (patch)
tree6e881b535d21484b00061aaba7081325801f5f3d /src/libstat/classifiers
parentb5d08799fcadfb314682f4fd29bcb44cce56c802 (diff)
downloadrspamd-df529a8d06aa25285362175c75c5275b9566b0b1.tar.gz
rspamd-df529a8d06aa25285362175c75c5275b9566b0b1.zip
[Fix] Switch from chi-square to naive for large Fisher value
Diffstat (limited to 'src/libstat/classifiers')
-rw-r--r--src/libstat/classifiers/bayes.c20
1 files changed, 18 insertions, 2 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c
index 1a28a42cb..2e710522b 100644
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -344,8 +344,24 @@ bayes_classify (struct rspamd_classifier * ctx,
return TRUE;
}
- h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens);
- s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens);
+ if (cl.spam_prob < -300 && cl.ham_prob < -300) {
+ /* Fisher value is low enough to apply inv_chi_square */
+ h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens);
+ s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens);
+ }
+ else {
+ /* Use naive method */
+ if (cl.spam_prob > cl.ham_prob) {
+ s = (1.0 - exp(cl.spam_prob / cl.ham_prob)) /
+ (1.0 + exp(cl.spam_prob / cl.ham_prob));
+ h = 1.0 - s;
+ }
+ else {
+ h = (1.0 - exp(cl.ham_prob / cl.spam_prob)) /
+ (1.0 + exp(cl.ham_prob / cl.spam_prob));
+ s = 1.0 - h;
+ }
+ }
if (isfinite (s) && isfinite (h)) {
final_prob = (s + 1.0 - h) / 2.;