diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-15 19:34:20 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-15 19:34:20 +0000 |
commit | df529a8d06aa25285362175c75c5275b9566b0b1 (patch) | |
tree | 6e881b535d21484b00061aaba7081325801f5f3d /src/libstat/classifiers | |
parent | b5d08799fcadfb314682f4fd29bcb44cce56c802 (diff) | |
download | rspamd-df529a8d06aa25285362175c75c5275b9566b0b1.tar.gz rspamd-df529a8d06aa25285362175c75c5275b9566b0b1.zip |
[Fix] Switch from chi-square to naive for large Fisher value
Diffstat (limited to 'src/libstat/classifiers')
-rw-r--r-- | src/libstat/classifiers/bayes.c | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 1a28a42cb..2e710522b 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -344,8 +344,24 @@ bayes_classify (struct rspamd_classifier * ctx, return TRUE; } - h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens); - s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens); + if (cl.spam_prob < -300 && cl.ham_prob < -300) { + /* Fisher value is low enough to apply inv_chi_square */ + h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens); + s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens); + } + else { + /* Use naive method */ + if (cl.spam_prob > cl.ham_prob) { + s = (1.0 - exp(cl.spam_prob / cl.ham_prob)) / + (1.0 + exp(cl.spam_prob / cl.ham_prob)); + h = 1.0 - s; + } + else { + h = (1.0 - exp(cl.ham_prob / cl.spam_prob)) / + (1.0 + exp(cl.ham_prob / cl.spam_prob)); + s = 1.0 - h; + } + } if (isfinite (s) && isfinite (h)) { final_prob = (s + 1.0 - h) / 2.; |