From 217642d356cb16c2a7352a612a682aa63bf26af1 Mon Sep 17 00:00:00 2001 From: Paul Dee Date: Mon, 2 Sep 2019 12:43:48 +0200 Subject: [PATCH] Update bayes.c clarified some messages --- src/libstat/classifiers/bayes.c | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 38e82d187..bec034cd6 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -80,7 +80,7 @@ inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg) sum = prob; - msg_debug_bayes ("m: %f, prob: %g", m, prob); + msg_debug_bayes ("m: %f, probability: %g", m, prob); /* * m is our confidence in class @@ -91,7 +91,7 @@ inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg) for (i = 1; i < freedom_deg; i++) { prob *= m / (gdouble)i; sum += prob; - msg_debug_bayes ("i=%d, prob: %g, sum: %g", i, prob, sum); + msg_debug_bayes ("i=%d, probability: %g, sum: %g", i, prob, sum); } return MIN (1.0, sum); @@ -197,7 +197,7 @@ bayes_classify_token (struct rspamd_classifier *ctx, if ((bayes_spam_prob > 0.5 && bayes_spam_prob < 0.5 + ctx->cfg->min_prob_strength) || (bayes_spam_prob < 0.5 && bayes_spam_prob > 0.5 - ctx->cfg->min_prob_strength)) { msg_debug_bayes ( - "token %uL <%*s:%*s> skipped, prob not in range: %f", + "token %uL <%*s:%*s> skipped, probability not in range: %f", tok->data, (int) tok->t1->stemmed.len, tok->t1->stemmed.begin, (int) tok->t2->stemmed.len, tok->t2->stemmed.begin, @@ -225,7 +225,7 @@ bayes_classify_token (struct rspamd_classifier *ctx, "spam_count: %ud, ham_count: %ud," "spam_prob: %.3f, ham_prob: %.3f, " "bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, " - "current spam prob: %.3f, current ham prob: %.3f", + "current spam probability: %.3f, current ham probability: %.3f", token_type, tok->data, (int) tok->t1->stemmed.len, tok->t1->stemmed.begin, @@ -241,7 +241,7 @@ bayes_classify_token (struct rspamd_classifier *ctx, "spam_count: %ud, ham_count: %ud," "spam_prob: %.3f, ham_prob: %.3f, " "bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, " - "current spam prob: %.3f, current ham prob: %.3f", + "current spam probability: %.3f, current ham probability: %.3f", token_type, tok->data, fw, w, total_count, spam_count, ham_count, @@ -291,15 +291,15 @@ bayes_classify (struct rspamd_classifier * ctx, /* Check min learns */ if (ctx->cfg->min_learns > 0) { if (ctx->ham_learns < ctx->cfg->min_learns) { - msg_info_task ("skip classification as ham class has not enough " - "learns: %ul, %ud required", + msg_info_task ("not classified as ham. The ham class needs more " + "training samples. Currently: %ul; minimum %ud required", ctx->ham_learns, ctx->cfg->min_learns); return TRUE; } if (ctx->spam_learns < ctx->cfg->min_learns) { - msg_info_task ("skip classification as spam class has not enough " - "learns: %ul, %ud required", + msg_info_task ("not classified as spam. The spam class needs more " + "training samples. Currently: %ul; minimum %ud required", ctx->spam_learns, ctx->cfg->min_learns); return TRUE; @@ -314,8 +314,8 @@ bayes_classify (struct rspamd_classifier * ctx, } if (text_tokens == 0) { - msg_info_task ("skip classification as there are no text tokens, " - "%ud total tokens", + msg_info_task ("skipped classification as there are no text tokens. " + "Total tokens: %ud", tokens->len); return TRUE; @@ -349,7 +349,7 @@ bayes_classify (struct rspamd_classifier * ctx, cl.text_tokens < (gint)(ctx->cfg->min_tokens * 0.1)) { msg_info_bayes ("ignore bayes probability since we have " "found too few text tokens: %uL (of %ud checked), " - "at least %d is required", + "at least %d required", cl.text_tokens, text_tokens, (gint)(ctx->cfg->min_tokens * 0.1)); @@ -379,7 +379,7 @@ bayes_classify (struct rspamd_classifier * ctx, if (isfinite (s) && isfinite (h)) { final_prob = (s + 1.0 - h) / 2.; msg_debug_bayes ( - "got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f," + "got ham probability %.2f -> %.2f and spam probability %.2f -> %.2f," " %L tokens processed of %ud total tokens;" " %uL text tokens found of %ud text tokens)", cl.ham_prob, @@ -398,17 +398,17 @@ bayes_classify (struct rspamd_classifier * ctx, */ if (isfinite (h)) { final_prob = 1.0; - msg_debug_bayes ("spam class is overflowed, as we have no" + msg_debug_bayes ("spam class is full: no" " ham samples"); } else if (isfinite (s)) { final_prob = 0.0; - msg_debug_bayes ("ham class is overflowed, as we have no" + msg_debug_bayes ("ham class is full: no" " spam samples"); } else { final_prob = 0.5; - msg_warn_bayes ("spam and ham classes are both overflowed"); + msg_warn_bayes ("spam and ham classes are both full"); } } @@ -553,4 +553,4 @@ bayes_learn_spam (struct rspamd_classifier * ctx, } return TRUE; -} \ No newline at end of file +} -- 2.39.5