From: Vsevolod Stakhov Date: Thu, 15 Nov 2018 19:34:20 +0000 (+0000) Subject: [Fix] Switch from chi-square to naive for large Fisher value X-Git-Tag: 1.8.2~23 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=df529a8d06aa25285362175c75c5275b9566b0b1;p=rspamd.git [Fix] Switch from chi-square to naive for large Fisher value --- diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 1a28a42cb..2e710522b 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -344,8 +344,24 @@ bayes_classify (struct rspamd_classifier * ctx, return TRUE; } - h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens); - s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens); + if (cl.spam_prob < -300 && cl.ham_prob < -300) { + /* Fisher value is low enough to apply inv_chi_square */ + h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens); + s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens); + } + else { + /* Use naive method */ + if (cl.spam_prob > cl.ham_prob) { + s = (1.0 - exp(cl.spam_prob / cl.ham_prob)) / + (1.0 + exp(cl.spam_prob / cl.ham_prob)); + h = 1.0 - s; + } + else { + h = (1.0 - exp(cl.ham_prob / cl.spam_prob)) / + (1.0 + exp(cl.ham_prob / cl.spam_prob)); + s = 1.0 - h; + } + } if (isfinite (s) && isfinite (h)) { final_prob = (s + 1.0 - h) / 2.;