aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-16 12:12:23 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-16 12:12:23 +0000
commit86bf20929247329a022faa7b0384c20fac0a5079 (patch)
treeb61b42924e3a1fa339b63fe5edb5c152c2b482ee /src/libstat
parentd302edad6a90062424df6883b0df9a3cb9325870 (diff)
downloadrspamd-86bf20929247329a022faa7b0384c20fac0a5079.tar.gz
rspamd-86bf20929247329a022faa7b0384c20fac0a5079.zip
[Rework] Improve bayes debug logging, remove unused stuff
Diffstat (limited to 'src/libstat')
-rw-r--r--src/libstat/classifiers/bayes.c11
-rw-r--r--src/libstat/classifiers/classifiers.h38
-rw-r--r--src/libstat/classifiers/lua_classifier.c15
-rw-r--r--src/libstat/stat_config.c11
-rw-r--r--src/libstat/stat_internal.h1
-rw-r--r--src/libstat/stat_process.c140
6 files changed, 63 insertions, 153 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c
index ee2125457..edaae4e79 100644
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -38,7 +38,7 @@
G_STRFUNC, \
__VA_ARGS__)
-INIT_LOG_MODULE(bayes)
+INIT_LOG_MODULE_PUBLIC(bayes)
static inline GQuark
bayes_error_quark (void)
@@ -254,13 +254,20 @@ bayes_classify_token (struct rspamd_classifier *ctx,
gboolean
-bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier *cl)
+bayes_init (struct rspamd_config *cfg,
+ struct event_base *ev_base,
+ struct rspamd_classifier *cl)
{
cl->cfg->flags |= RSPAMD_FLAG_CLASSIFIER_INTEGER;
return TRUE;
}
+void
+bayes_fin (struct rspamd_classifier *cl)
+{
+}
+
gboolean
bayes_classify (struct rspamd_classifier * ctx,
GPtrArray *tokens,
diff --git a/src/libstat/classifiers/classifiers.h b/src/libstat/classifiers/classifiers.h
index e30f2153a..fd6daf433 100644
--- a/src/libstat/classifiers/classifiers.h
+++ b/src/libstat/classifiers/classifiers.h
@@ -3,6 +3,7 @@
#include "config.h"
#include "mem_pool.h"
+#include <event.h>
#define RSPAMD_DEFAULT_CLASSIFIER "bayes"
/* Consider this value as 0 */
@@ -10,28 +11,32 @@
struct rspamd_classifier_config;
struct rspamd_task;
+struct rspamd_config;
struct rspamd_classifier;
struct token_node_s;
struct rspamd_stat_classifier {
char *name;
- gboolean (*init_func)(rspamd_mempool_t *pool,
- struct rspamd_classifier *cl);
+ gboolean (*init_func)(struct rspamd_config *cfg,
+ struct event_base *ev_base,
+ struct rspamd_classifier *cl);
gboolean (*classify_func)(struct rspamd_classifier * ctx,
- GPtrArray *tokens,
- struct rspamd_task *task);
+ GPtrArray *tokens,
+ struct rspamd_task *task);
gboolean (*learn_spam_func)(struct rspamd_classifier * ctx,
- GPtrArray *input,
- struct rspamd_task *task,
- gboolean is_spam,
- gboolean unlearn,
- GError **err);
+ GPtrArray *input,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ gboolean unlearn,
+ GError **err);
+ void (*fin_func)(struct rspamd_classifier *cl);
};
/* Bayes algorithm */
-gboolean bayes_init (rspamd_mempool_t *pool,
- struct rspamd_classifier *);
+gboolean bayes_init (struct rspamd_config *cfg,
+ struct event_base *ev_base,
+ struct rspamd_classifier *);
gboolean bayes_classify (struct rspamd_classifier *ctx,
GPtrArray *tokens,
struct rspamd_task *task);
@@ -41,10 +46,12 @@ gboolean bayes_learn_spam (struct rspamd_classifier *ctx,
gboolean is_spam,
gboolean unlearn,
GError **err);
+void bayes_fin (struct rspamd_classifier *);
/* Generic lua classifier */
-gboolean lua_classifier_init (rspamd_mempool_t *pool,
- struct rspamd_classifier *);
+gboolean lua_classifier_init (struct rspamd_config *cfg,
+ struct event_base *ev_base,
+ struct rspamd_classifier *);
gboolean lua_classifier_classify (struct rspamd_classifier *ctx,
GPtrArray *tokens,
struct rspamd_task *task);
@@ -55,6 +62,11 @@ gboolean lua_classifier_learn_spam (struct rspamd_classifier *ctx,
gboolean unlearn,
GError **err);
+extern guint rspamd_bayes_log_id;
+#define msg_debug_bayes(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \
+ rspamd_bayes_log_id, "bayes", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
#endif
/*
diff --git a/src/libstat/classifiers/lua_classifier.c b/src/libstat/classifiers/lua_classifier.c
index 7b495b165..83ce7b0e1 100644
--- a/src/libstat/classifiers/lua_classifier.c
+++ b/src/libstat/classifiers/lua_classifier.c
@@ -47,8 +47,9 @@ static GHashTable *lua_classifiers = NULL;
INIT_LOG_MODULE(luacl)
gboolean
-lua_classifier_init (rspamd_mempool_t *pool,
- struct rspamd_classifier *cl)
+lua_classifier_init (struct rspamd_config *cfg,
+ struct event_base *ev_base,
+ struct rspamd_classifier *cl)
{
struct rspamd_lua_classifier_ctx *ctx;
lua_State *L = cl->ctx->cfg->lua_state;
@@ -62,7 +63,7 @@ lua_classifier_init (rspamd_mempool_t *pool,
ctx = g_hash_table_lookup (lua_classifiers, cl->subrs->name);
if (ctx != NULL) {
- msg_err_pool ("duplicate lua classifier definition: %s",
+ msg_err_config ("duplicate lua classifier definition: %s",
cl->subrs->name);
return FALSE;
@@ -70,7 +71,7 @@ lua_classifier_init (rspamd_mempool_t *pool,
lua_getglobal (L, "rspamd_classifiers");
if (lua_type (L, -1) != LUA_TTABLE) {
- msg_err_pool ("cannot register classifier %s: no rspamd_classifier global",
+ msg_err_config ("cannot register classifier %s: no rspamd_classifier global",
cl->subrs->name);
lua_pop (L, 1);
@@ -81,7 +82,7 @@ lua_classifier_init (rspamd_mempool_t *pool,
lua_gettable (L, -2);
if (lua_type (L, -1) != LUA_TTABLE) {
- msg_err_pool ("cannot register classifier %s: bad lua type: %s",
+ msg_err_config ("cannot register classifier %s: bad lua type: %s",
cl->subrs->name, lua_typename (L, lua_type (L, -1)));
lua_pop (L, 2);
@@ -92,7 +93,7 @@ lua_classifier_init (rspamd_mempool_t *pool,
lua_gettable (L, -2);
if (lua_type (L, -1) != LUA_TFUNCTION) {
- msg_err_pool ("cannot register classifier %s: bad lua type for classify: %s",
+ msg_err_config ("cannot register classifier %s: bad lua type for classify: %s",
cl->subrs->name, lua_typename (L, lua_type (L, -1)));
lua_pop (L, 3);
@@ -105,7 +106,7 @@ lua_classifier_init (rspamd_mempool_t *pool,
lua_gettable (L, -2);
if (lua_type (L, -1) != LUA_TFUNCTION) {
- msg_err_pool ("cannot register classifier %s: bad lua type for learn: %s",
+ msg_err_config ("cannot register classifier %s: bad lua type for learn: %s",
cl->subrs->name, lua_typename (L, lua_type (L, -1)));
lua_pop (L, 3);
diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c
index 9d1e57f13..d2772e9ca 100644
--- a/src/libstat/stat_config.c
+++ b/src/libstat/stat_config.c
@@ -28,6 +28,7 @@ static struct rspamd_stat_classifier lua_classifier = {
.init_func = lua_classifier_init,
.classify_func = lua_classifier_classify,
.learn_spam_func = lua_classifier_learn_spam,
+ .fin_func = NULL,
};
static struct rspamd_stat_classifier stat_classifiers[] = {
@@ -36,6 +37,7 @@ static struct rspamd_stat_classifier stat_classifiers[] = {
.init_func = bayes_init,
.classify_func = bayes_classify,
.learn_spam_func = bayes_learn_spam,
+ .fin_func = bayes_fin,
}
};
@@ -182,7 +184,7 @@ rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base)
continue;
}
- if (!cl->subrs->init_func (cfg->cfg_pool, cl)) {
+ if (!cl->subrs->init_func (cfg, ev_base, cl)) {
g_free (cl);
msg_err_config ("cannot init classifier type %s", clf->name);
cur = g_list_next (cur);
@@ -328,6 +330,11 @@ rspamd_stat_close (void)
}
g_array_free (cl->statfiles_ids, TRUE);
+
+ if (cl->subrs->fin_func) {
+ cl->subrs->fin_func (cl);
+ }
+
g_free (cl);
}
@@ -475,11 +482,11 @@ rspamd_stat_ctx_register_async (rspamd_stat_async_handler handler,
g_assert (st_ctx != NULL);
elt = g_malloc0 (sizeof (*elt));
- REF_INIT_RETAIN (elt, rspamd_async_elt_dtor);
elt->handler = handler;
elt->cleanup = cleanup;
elt->ud = d;
elt->timeout = timeout;
+ REF_INIT_RETAIN (elt, rspamd_async_elt_dtor);
/* Enabled by default */
diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h
index 44f48ae5a..746199d45 100644
--- a/src/libstat/stat_internal.h
+++ b/src/libstat/stat_internal.h
@@ -41,6 +41,7 @@ struct rspamd_classifier {
gulong ham_learns;
struct rspamd_classifier_config *cfg;
struct rspamd_stat_classifier *subrs;
+ gpointer specific;
};
struct rspamd_statfile {
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index e4f95a514..d07e24156 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -63,7 +63,7 @@ rspamd_stat_tokenize_header (struct rspamd_task *task,
}
}
- msg_debug_task ("added stat tokens for header '%s'", name);
+ msg_debug_bayes ("added stat tokens for header '%s'", name);
}
}
@@ -114,7 +114,7 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
g_array_append_val (ar, elt);
}
- msg_debug_task ("added stat tokens for image '%s'", img->html_image->src);
+ msg_debug_bayes ("added stat tokens for image '%s'", img->html_image->src);
}
}
else if (part->cd && part->cd->filename.len > 0) {
@@ -133,7 +133,7 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
elt.len = part->ct->boundary.len;
if (elt.len) {
- msg_debug_task ("added stat tokens for mime boundary '%*s'",
+ msg_debug_bayes ("added stat tokens for mime boundary '%*s'",
(gint)elt.len, elt.begin);
g_array_append_val (ar, elt);
}
@@ -155,13 +155,13 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
if (tp->language != NULL && tp->language[0] != '\0') {
elt.begin = (gchar *)tp->language;
elt.len = strlen (elt.begin);
- msg_debug_task ("added stat tokens for part language '%s'", elt.begin);
+ msg_debug_bayes ("added stat tokens for part language '%s'", elt.begin);
g_array_append_val (ar, elt);
}
if (tp->real_charset != NULL) {
elt.begin = (gchar *)tp->real_charset;
elt.len = strlen (elt.begin);
- msg_debug_task ("added stat tokens for part charset '%s'", elt.begin);
+ msg_debug_bayes ("added stat tokens for part charset '%s'", elt.begin);
g_array_append_val (ar, elt);
}
}
@@ -184,124 +184,6 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
g_array_append_val (ar, elt);
}
- /* Use more precise headers order */
-#if 0
- cur = g_list_first (task->headers_order->head);
- while (cur) {
- hdr = cur->data;
-
- if (hdr->name && hdr->type != RSPAMD_HEADER_RECEIVED) {
- elt.begin = hdr->name;
- elt.len = strlen (hdr->name);
- g_array_append_val (ar, elt);
- }
-
- cur = g_list_next (cur);
- }
-#endif
-
- /* Use metatokens plugin from Lua */
- lua_getglobal (L, "rspamd_plugins");
-
- if (lua_type (L, -1) == LUA_TTABLE) {
- lua_pushstring (L, "stat_metatokens");
- lua_gettable (L, -2);
-
- if (lua_type (L, -1) == LUA_TTABLE) {
- gint old_top;
-
- old_top = lua_gettop (L);
- lua_pushstring (L, "callback");
- lua_gettable (L, -2);
-
- if (lua_type (L, -1) == LUA_TFUNCTION) {
- struct rspamd_task **ptask;
-
- ptask = lua_newuserdata (L, sizeof (*ptask));
- rspamd_lua_setclass (L, "rspamd{task}", -1);
- *ptask = task;
-
- if (lua_pcall (L, 1, LUA_MULTRET, 0) != 0) {
- msg_err_task ("stat_metatokens failed: %s",
- lua_tostring (L, -1));
- lua_pop (L, 1);
- } else {
- if (lua_gettop (L) > old_top &&
- lua_istable (L, old_top + 1)) {
- lua_pushvalue (L, old_top + 1);
- /* Iterate over table of tables */
- for (lua_pushnil (L); lua_next (L, -2);
- lua_pop (L, 1)) {
- elt.flags = RSPAMD_STAT_TOKEN_FLAG_META|
- RSPAMD_STAT_TOKEN_FLAG_LUA_META;
-
- if (lua_isnumber (L, -1)) {
- gdouble num = lua_tonumber (L, -1);
- guint8 *pnum = rspamd_mempool_alloc (
- task->task_pool,
- sizeof (num));
-
- msg_debug_task ("got metatoken number: %.2f",
- num);
- memcpy (pnum, &num, sizeof (num));
- elt.begin = (gchar *) pnum;
- elt.len = sizeof (num);
- g_array_append_val (ar, elt);
- } else if (lua_isstring (L, -1)) {
- const gchar *str;
- gsize tlen;
-
- str = lua_tolstring (L, -1, &tlen);
- guint8 *pstr = rspamd_mempool_alloc (
- task->task_pool,
- tlen);
- memcpy (pstr, str, tlen);
-
- msg_debug_task ("got metatoken string: %*s",
- (gint) tlen, str);
- elt.begin = (gchar *) pstr;
- elt.len = tlen;
- g_array_append_val (ar, elt);
- }
- else if (lua_istable (L, -1)) {
- /* Treat that as unigramms */
- for (lua_pushnil (L); lua_next (L, -2);
- lua_pop (L, 1)) {
- if (lua_isstring (L, -1)) {
- const gchar *str;
- gsize tlen;
-
- str = lua_tolstring (L, -1, &tlen);
- guint8 *pstr = rspamd_mempool_alloc (
- task->task_pool,
- tlen);
- memcpy (pstr, str, tlen);
-
- msg_debug_task ("got unigramm "
- "metatoken string: %*s",
- (gint) tlen, str);
- elt.begin = (gchar *) pstr;
- elt.len = tlen;
- elt.flags |= RSPAMD_STAT_TOKEN_FLAG_UNIGRAM;
- g_array_append_val (ar, elt);
- }
- }
- }
- }
- }
- }
- }
- }
- }
-
- lua_settop (L, 0);
- st_ctx->tokenizer->tokenize_func (st_ctx,
- task,
- ar,
- TRUE,
- "META:",
- task->tokens);
-
rspamd_mempool_add_destructor (task->task_pool,
rspamd_array_free_hard, ar);
}
@@ -354,7 +236,7 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
if (pdiff != NULL && (1.0 - *pdiff) * 100.0 > similarity_treshold) {
- msg_debug_task ("message has two common parts (%.2f), so skip the last one",
+ msg_debug_bayes ("message has two common parts (%.2f), so skip the last one",
*pdiff);
break;
}
@@ -425,7 +307,7 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx,
if (!rspamd_symcache_is_symbol_enabled (task, task->cfg->cache,
st->stcf->symbol)) {
g_ptr_array_index (task->stat_runtimes, i) = NULL;
- msg_debug_task ("symbol %s is disabled, skip classification",
+ msg_debug_bayes ("symbol %s is disabled, skip classification",
st->stcf->symbol);
continue;
}
@@ -574,7 +456,7 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx,
if (bk_run == NULL) {
skip = TRUE;
- msg_debug_task ("disable classifier %s as statfile symbol %s is disabled",
+ msg_debug_bayes ("disable classifier %s as statfile symbol %s is disabled",
cl->cfg->name, st->stcf->symbol);
break;
}
@@ -583,7 +465,7 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx,
if (!skip) {
if (cl->cfg->min_tokens > 0 && task->tokens->len < cl->cfg->min_tokens) {
- msg_debug_task (
+ msg_debug_bayes (
"<%s> contains less tokens than required for %s classifier: "
"%ud < %ud",
task->message_id,
@@ -593,7 +475,7 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx,
continue;
}
else if (cl->cfg->max_tokens > 0 && task->tokens->len > cl->cfg->max_tokens) {
- msg_debug_task (
+ msg_debug_bayes (
"<%s> contains more tokens than allowed for %s classifier: "
"%ud > %ud",
task->message_id,
@@ -1090,7 +972,7 @@ rspamd_stat_has_classifier_symbols (struct rspamd_task *task,
if (rspamd_task_find_symbol_result (task, st->stcf->symbol)) {
if (is_spam == !!st->stcf->is_spam) {
- msg_debug_task ("do not autolearn %s as symbol %s is already "
+ msg_debug_bayes ("do not autolearn %s as symbol %s is already "
"added", is_spam ? "spam" : "ham", st->stcf->symbol);
return TRUE;