From 84b57c53f4f6e9b3915d57b4a5c83570fea50dc3 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 16 Nov 2018 17:16:25 +0000 Subject: [Minor] Allow to have bulk learn in the default config --- conf/statistic.conf | 32 ++++++++++++++++++-------------- src/libstat/learn_cache/sqlite3_cache.c | 2 ++ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/conf/statistic.conf b/conf/statistic.conf index 26e73c4d2..d9b9a1b72 100644 --- a/conf/statistic.conf +++ b/conf/statistic.conf @@ -43,22 +43,26 @@ classifier "bayes" { } learn_condition =<= 0.95 - else - cl = 'ham' - in_class = prob <= 0.05 - end + if not (learn_type and tostring(learn_type) == 'bulk') then + local prob = task:get_mempool():get_variable('bayes_prob', 'double') + + if prob then + local in_class = false + local cl + if is_spam then + cl = 'spam' + in_class = prob >= 0.95 + else + cl = 'ham' + in_class = prob <= 0.05 + end - if in_class then - return false,string.format('already in class %s; probability %.2f%%', - cl, math.abs((prob - 0.5) * 200.0)) + if in_class then + return false,string.format('already in class %s; probability %.2f%%', + cl, math.abs((prob - 0.5) * 200.0)) + end end end diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c index 255c835bb..52921326d 100644 --- a/src/libstat/learn_cache/sqlite3_cache.c +++ b/src/libstat/learn_cache/sqlite3_cache.c @@ -221,6 +221,8 @@ rspamd_stat_cache_sqlite3_check (struct rspamd_task *task, /* We have some existing record in the table */ if (!!flag == !!is_spam) { /* Already learned */ + msg_warn_task ("already seen stat hash: %*bs", + rspamd_cryptobox_HASHBYTES, out); return RSPAMD_LEARN_INGORE; } else { -- cgit v1.2.3