From 49caa5a69eebb62b55d539d0ae7c2f4bd1a106f0 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 16 Nov 2018 17:41:40 +0000 Subject: [PATCH] [Minor] Fix some issues and add debugging --- lualib/lua_stat.lua | 4 ++-- src/libstat/classifiers/bayes.c | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/lualib/lua_stat.lua b/lualib/lua_stat.lua index 4d6fef7a8..2b43defb6 100644 --- a/lualib/lua_stat.lua +++ b/lualib/lua_stat.lua @@ -621,12 +621,12 @@ local function get_mime_stat_tokens(task, res, i) online_text = true end - rawset(res, i, "#lang:" .. tp:get_language() or 'unk') + rawset(res, i, "#lang:" .. (tp:get_language() or 'unk')) lua_util.debugm("bayes", task, "added language: %s", res[i]) i = i + 1 - rawset(res, i, "#cs:" .. tp:get_charset() or 'unk') + rawset(res, i, "#cs:" .. (tp:get_charset() or 'unk')) lua_util.debugm("bayes", task, "added charset: %s", res[i]) i = i + 1 diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index edaae4e79..934c8d941 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -337,7 +337,9 @@ bayes_classify (struct rspamd_classifier * ctx, } if (cl.processed_tokens == 0) { - msg_info_bayes ("no tokens found in bayes database, ignore stats"); + msg_info_bayes ("no tokens found in bayes database " + "(%ud total tokens, %ud text tokens), ignore stats", + tokens->len, text_tokens); return TRUE; } @@ -345,8 +347,11 @@ bayes_classify (struct rspamd_classifier * ctx, if (ctx->cfg->min_tokens > 0 && cl.text_tokens < (gint)(ctx->cfg->min_tokens * 0.1)) { msg_info_bayes ("ignore bayes probability since we have " - "too few text tokens: %uL, at least %d is required", - cl.text_tokens, (gint)(ctx->cfg->min_tokens * 0.1)); + "found too few text tokens: %uL (of %ud checked), " + "at least %d is required", + cl.text_tokens, + text_tokens, + (gint)(ctx->cfg->min_tokens * 0.1)); return TRUE; } @@ -374,7 +379,8 @@ bayes_classify (struct rspamd_classifier * ctx, final_prob = (s + 1.0 - h) / 2.; msg_debug_bayes ( "<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f," - " %L tokens processed of %ud total tokens (%uL text tokens)", + " %L tokens processed of %ud total tokens;" + " %uL text tokens found of %ud text tokens)", task->message_id, cl.ham_prob, h, @@ -382,7 +388,8 @@ bayes_classify (struct rspamd_classifier * ctx, s, cl.processed_tokens, tokens->len, - cl.text_tokens); + cl.text_tokens, + text_tokens); } else { /* -- 2.39.5