]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Switch from chi-square to naive for large Fisher value
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 15 Nov 2018 19:34:20 +0000 (19:34 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 15 Nov 2018 19:34:20 +0000 (19:34 +0000)
src/libstat/classifiers/bayes.c

index 1a28a42cba6a0e5995890ca44902ea78812fd637..2e710522bcd4d903ec09942266ba9923ea294d9a 100644 (file)
@@ -344,8 +344,24 @@ bayes_classify (struct rspamd_classifier * ctx,
                return TRUE;
        }
 
-       h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens);
-       s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens);
+       if (cl.spam_prob < -300 && cl.ham_prob < -300) {
+               /* Fisher value is low enough to apply inv_chi_square */
+               h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens);
+               s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens);
+       }
+       else {
+               /* Use naive method */
+               if (cl.spam_prob > cl.ham_prob) {
+                       s = (1.0 - exp(cl.spam_prob / cl.ham_prob)) /
+                                       (1.0 + exp(cl.spam_prob / cl.ham_prob));
+                       h = 1.0 - s;
+               }
+               else {
+                       h = (1.0 - exp(cl.ham_prob / cl.spam_prob)) /
+                               (1.0 + exp(cl.ham_prob / cl.spam_prob));
+                       s = 1.0 - h;
+               }
+       }
 
        if (isfinite (s) && isfinite (h)) {
                final_prob = (s + 1.0 - h) / 2.;