diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-16 17:41:40 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-16 17:41:40 +0000 |
commit | 49caa5a69eebb62b55d539d0ae7c2f4bd1a106f0 (patch) | |
tree | d34c17275f0e238c2f53311fae2e25202a1b79e0 | |
parent | 2a0763e955fcbfa2c61e28faaa71765a8fc685e6 (diff) | |
download | rspamd-49caa5a69eebb62b55d539d0ae7c2f4bd1a106f0.tar.gz rspamd-49caa5a69eebb62b55d539d0ae7c2f4bd1a106f0.zip |
[Minor] Fix some issues and add debugging
-rw-r--r-- | lualib/lua_stat.lua | 4 | ||||
-rw-r--r-- | src/libstat/classifiers/bayes.c | 17 |
2 files changed, 14 insertions, 7 deletions
diff --git a/lualib/lua_stat.lua b/lualib/lua_stat.lua index 4d6fef7a8..2b43defb6 100644 --- a/lualib/lua_stat.lua +++ b/lualib/lua_stat.lua @@ -621,12 +621,12 @@ local function get_mime_stat_tokens(task, res, i) online_text = true end - rawset(res, i, "#lang:" .. tp:get_language() or 'unk') + rawset(res, i, "#lang:" .. (tp:get_language() or 'unk')) lua_util.debugm("bayes", task, "added language: %s", res[i]) i = i + 1 - rawset(res, i, "#cs:" .. tp:get_charset() or 'unk') + rawset(res, i, "#cs:" .. (tp:get_charset() or 'unk')) lua_util.debugm("bayes", task, "added charset: %s", res[i]) i = i + 1 diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index edaae4e79..934c8d941 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -337,7 +337,9 @@ bayes_classify (struct rspamd_classifier * ctx, } if (cl.processed_tokens == 0) { - msg_info_bayes ("no tokens found in bayes database, ignore stats"); + msg_info_bayes ("no tokens found in bayes database " + "(%ud total tokens, %ud text tokens), ignore stats", + tokens->len, text_tokens); return TRUE; } @@ -345,8 +347,11 @@ bayes_classify (struct rspamd_classifier * ctx, if (ctx->cfg->min_tokens > 0 && cl.text_tokens < (gint)(ctx->cfg->min_tokens * 0.1)) { msg_info_bayes ("ignore bayes probability since we have " - "too few text tokens: %uL, at least %d is required", - cl.text_tokens, (gint)(ctx->cfg->min_tokens * 0.1)); + "found too few text tokens: %uL (of %ud checked), " + "at least %d is required", + cl.text_tokens, + text_tokens, + (gint)(ctx->cfg->min_tokens * 0.1)); return TRUE; } @@ -374,7 +379,8 @@ bayes_classify (struct rspamd_classifier * ctx, final_prob = (s + 1.0 - h) / 2.; msg_debug_bayes ( "<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f," - " %L tokens processed of %ud total tokens (%uL text tokens)", + " %L tokens processed of %ud total tokens;" + " %uL text tokens found of %ud text tokens)", task->message_id, cl.ham_prob, h, @@ -382,7 +388,8 @@ bayes_classify (struct rspamd_classifier * ctx, s, cl.processed_tokens, tokens->len, - cl.text_tokens); + cl.text_tokens, + text_tokens); } else { /* |