diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-16 12:12:23 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-16 12:12:23 +0000 |
commit | 86bf20929247329a022faa7b0384c20fac0a5079 (patch) | |
tree | b61b42924e3a1fa339b63fe5edb5c152c2b482ee /src/libstat | |
parent | d302edad6a90062424df6883b0df9a3cb9325870 (diff) | |
download | rspamd-86bf20929247329a022faa7b0384c20fac0a5079.tar.gz rspamd-86bf20929247329a022faa7b0384c20fac0a5079.zip |
[Rework] Improve bayes debug logging, remove unused stuff
Diffstat (limited to 'src/libstat')
-rw-r--r-- | src/libstat/classifiers/bayes.c | 11 | ||||
-rw-r--r-- | src/libstat/classifiers/classifiers.h | 38 | ||||
-rw-r--r-- | src/libstat/classifiers/lua_classifier.c | 15 | ||||
-rw-r--r-- | src/libstat/stat_config.c | 11 | ||||
-rw-r--r-- | src/libstat/stat_internal.h | 1 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 140 |
6 files changed, 63 insertions, 153 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index ee2125457..edaae4e79 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -38,7 +38,7 @@ G_STRFUNC, \ __VA_ARGS__) -INIT_LOG_MODULE(bayes) +INIT_LOG_MODULE_PUBLIC(bayes) static inline GQuark bayes_error_quark (void) @@ -254,13 +254,20 @@ bayes_classify_token (struct rspamd_classifier *ctx, gboolean -bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier *cl) +bayes_init (struct rspamd_config *cfg, + struct event_base *ev_base, + struct rspamd_classifier *cl) { cl->cfg->flags |= RSPAMD_FLAG_CLASSIFIER_INTEGER; return TRUE; } +void +bayes_fin (struct rspamd_classifier *cl) +{ +} + gboolean bayes_classify (struct rspamd_classifier * ctx, GPtrArray *tokens, diff --git a/src/libstat/classifiers/classifiers.h b/src/libstat/classifiers/classifiers.h index e30f2153a..fd6daf433 100644 --- a/src/libstat/classifiers/classifiers.h +++ b/src/libstat/classifiers/classifiers.h @@ -3,6 +3,7 @@ #include "config.h" #include "mem_pool.h" +#include <event.h> #define RSPAMD_DEFAULT_CLASSIFIER "bayes" /* Consider this value as 0 */ @@ -10,28 +11,32 @@ struct rspamd_classifier_config; struct rspamd_task; +struct rspamd_config; struct rspamd_classifier; struct token_node_s; struct rspamd_stat_classifier { char *name; - gboolean (*init_func)(rspamd_mempool_t *pool, - struct rspamd_classifier *cl); + gboolean (*init_func)(struct rspamd_config *cfg, + struct event_base *ev_base, + struct rspamd_classifier *cl); gboolean (*classify_func)(struct rspamd_classifier * ctx, - GPtrArray *tokens, - struct rspamd_task *task); + GPtrArray *tokens, + struct rspamd_task *task); gboolean (*learn_spam_func)(struct rspamd_classifier * ctx, - GPtrArray *input, - struct rspamd_task *task, - gboolean is_spam, - gboolean unlearn, - GError **err); + GPtrArray *input, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err); + void (*fin_func)(struct rspamd_classifier *cl); }; /* Bayes algorithm */ -gboolean bayes_init (rspamd_mempool_t *pool, - struct rspamd_classifier *); +gboolean bayes_init (struct rspamd_config *cfg, + struct event_base *ev_base, + struct rspamd_classifier *); gboolean bayes_classify (struct rspamd_classifier *ctx, GPtrArray *tokens, struct rspamd_task *task); @@ -41,10 +46,12 @@ gboolean bayes_learn_spam (struct rspamd_classifier *ctx, gboolean is_spam, gboolean unlearn, GError **err); +void bayes_fin (struct rspamd_classifier *); /* Generic lua classifier */ -gboolean lua_classifier_init (rspamd_mempool_t *pool, - struct rspamd_classifier *); +gboolean lua_classifier_init (struct rspamd_config *cfg, + struct event_base *ev_base, + struct rspamd_classifier *); gboolean lua_classifier_classify (struct rspamd_classifier *ctx, GPtrArray *tokens, struct rspamd_task *task); @@ -55,6 +62,11 @@ gboolean lua_classifier_learn_spam (struct rspamd_classifier *ctx, gboolean unlearn, GError **err); +extern guint rspamd_bayes_log_id; +#define msg_debug_bayes(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \ + rspamd_bayes_log_id, "bayes", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) #endif /* diff --git a/src/libstat/classifiers/lua_classifier.c b/src/libstat/classifiers/lua_classifier.c index 7b495b165..83ce7b0e1 100644 --- a/src/libstat/classifiers/lua_classifier.c +++ b/src/libstat/classifiers/lua_classifier.c @@ -47,8 +47,9 @@ static GHashTable *lua_classifiers = NULL; INIT_LOG_MODULE(luacl) gboolean -lua_classifier_init (rspamd_mempool_t *pool, - struct rspamd_classifier *cl) +lua_classifier_init (struct rspamd_config *cfg, + struct event_base *ev_base, + struct rspamd_classifier *cl) { struct rspamd_lua_classifier_ctx *ctx; lua_State *L = cl->ctx->cfg->lua_state; @@ -62,7 +63,7 @@ lua_classifier_init (rspamd_mempool_t *pool, ctx = g_hash_table_lookup (lua_classifiers, cl->subrs->name); if (ctx != NULL) { - msg_err_pool ("duplicate lua classifier definition: %s", + msg_err_config ("duplicate lua classifier definition: %s", cl->subrs->name); return FALSE; @@ -70,7 +71,7 @@ lua_classifier_init (rspamd_mempool_t *pool, lua_getglobal (L, "rspamd_classifiers"); if (lua_type (L, -1) != LUA_TTABLE) { - msg_err_pool ("cannot register classifier %s: no rspamd_classifier global", + msg_err_config ("cannot register classifier %s: no rspamd_classifier global", cl->subrs->name); lua_pop (L, 1); @@ -81,7 +82,7 @@ lua_classifier_init (rspamd_mempool_t *pool, lua_gettable (L, -2); if (lua_type (L, -1) != LUA_TTABLE) { - msg_err_pool ("cannot register classifier %s: bad lua type: %s", + msg_err_config ("cannot register classifier %s: bad lua type: %s", cl->subrs->name, lua_typename (L, lua_type (L, -1))); lua_pop (L, 2); @@ -92,7 +93,7 @@ lua_classifier_init (rspamd_mempool_t *pool, lua_gettable (L, -2); if (lua_type (L, -1) != LUA_TFUNCTION) { - msg_err_pool ("cannot register classifier %s: bad lua type for classify: %s", + msg_err_config ("cannot register classifier %s: bad lua type for classify: %s", cl->subrs->name, lua_typename (L, lua_type (L, -1))); lua_pop (L, 3); @@ -105,7 +106,7 @@ lua_classifier_init (rspamd_mempool_t *pool, lua_gettable (L, -2); if (lua_type (L, -1) != LUA_TFUNCTION) { - msg_err_pool ("cannot register classifier %s: bad lua type for learn: %s", + msg_err_config ("cannot register classifier %s: bad lua type for learn: %s", cl->subrs->name, lua_typename (L, lua_type (L, -1))); lua_pop (L, 3); diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c index 9d1e57f13..d2772e9ca 100644 --- a/src/libstat/stat_config.c +++ b/src/libstat/stat_config.c @@ -28,6 +28,7 @@ static struct rspamd_stat_classifier lua_classifier = { .init_func = lua_classifier_init, .classify_func = lua_classifier_classify, .learn_spam_func = lua_classifier_learn_spam, + .fin_func = NULL, }; static struct rspamd_stat_classifier stat_classifiers[] = { @@ -36,6 +37,7 @@ static struct rspamd_stat_classifier stat_classifiers[] = { .init_func = bayes_init, .classify_func = bayes_classify, .learn_spam_func = bayes_learn_spam, + .fin_func = bayes_fin, } }; @@ -182,7 +184,7 @@ rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base) continue; } - if (!cl->subrs->init_func (cfg->cfg_pool, cl)) { + if (!cl->subrs->init_func (cfg, ev_base, cl)) { g_free (cl); msg_err_config ("cannot init classifier type %s", clf->name); cur = g_list_next (cur); @@ -328,6 +330,11 @@ rspamd_stat_close (void) } g_array_free (cl->statfiles_ids, TRUE); + + if (cl->subrs->fin_func) { + cl->subrs->fin_func (cl); + } + g_free (cl); } @@ -475,11 +482,11 @@ rspamd_stat_ctx_register_async (rspamd_stat_async_handler handler, g_assert (st_ctx != NULL); elt = g_malloc0 (sizeof (*elt)); - REF_INIT_RETAIN (elt, rspamd_async_elt_dtor); elt->handler = handler; elt->cleanup = cleanup; elt->ud = d; elt->timeout = timeout; + REF_INIT_RETAIN (elt, rspamd_async_elt_dtor); /* Enabled by default */ diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h index 44f48ae5a..746199d45 100644 --- a/src/libstat/stat_internal.h +++ b/src/libstat/stat_internal.h @@ -41,6 +41,7 @@ struct rspamd_classifier { gulong ham_learns; struct rspamd_classifier_config *cfg; struct rspamd_stat_classifier *subrs; + gpointer specific; }; struct rspamd_statfile { diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index e4f95a514..d07e24156 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -63,7 +63,7 @@ rspamd_stat_tokenize_header (struct rspamd_task *task, } } - msg_debug_task ("added stat tokens for header '%s'", name); + msg_debug_bayes ("added stat tokens for header '%s'", name); } } @@ -114,7 +114,7 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, g_array_append_val (ar, elt); } - msg_debug_task ("added stat tokens for image '%s'", img->html_image->src); + msg_debug_bayes ("added stat tokens for image '%s'", img->html_image->src); } } else if (part->cd && part->cd->filename.len > 0) { @@ -133,7 +133,7 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, elt.len = part->ct->boundary.len; if (elt.len) { - msg_debug_task ("added stat tokens for mime boundary '%*s'", + msg_debug_bayes ("added stat tokens for mime boundary '%*s'", (gint)elt.len, elt.begin); g_array_append_val (ar, elt); } @@ -155,13 +155,13 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, if (tp->language != NULL && tp->language[0] != '\0') { elt.begin = (gchar *)tp->language; elt.len = strlen (elt.begin); - msg_debug_task ("added stat tokens for part language '%s'", elt.begin); + msg_debug_bayes ("added stat tokens for part language '%s'", elt.begin); g_array_append_val (ar, elt); } if (tp->real_charset != NULL) { elt.begin = (gchar *)tp->real_charset; elt.len = strlen (elt.begin); - msg_debug_task ("added stat tokens for part charset '%s'", elt.begin); + msg_debug_bayes ("added stat tokens for part charset '%s'", elt.begin); g_array_append_val (ar, elt); } } @@ -184,124 +184,6 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, g_array_append_val (ar, elt); } - /* Use more precise headers order */ -#if 0 - cur = g_list_first (task->headers_order->head); - while (cur) { - hdr = cur->data; - - if (hdr->name && hdr->type != RSPAMD_HEADER_RECEIVED) { - elt.begin = hdr->name; - elt.len = strlen (hdr->name); - g_array_append_val (ar, elt); - } - - cur = g_list_next (cur); - } -#endif - - /* Use metatokens plugin from Lua */ - lua_getglobal (L, "rspamd_plugins"); - - if (lua_type (L, -1) == LUA_TTABLE) { - lua_pushstring (L, "stat_metatokens"); - lua_gettable (L, -2); - - if (lua_type (L, -1) == LUA_TTABLE) { - gint old_top; - - old_top = lua_gettop (L); - lua_pushstring (L, "callback"); - lua_gettable (L, -2); - - if (lua_type (L, -1) == LUA_TFUNCTION) { - struct rspamd_task **ptask; - - ptask = lua_newuserdata (L, sizeof (*ptask)); - rspamd_lua_setclass (L, "rspamd{task}", -1); - *ptask = task; - - if (lua_pcall (L, 1, LUA_MULTRET, 0) != 0) { - msg_err_task ("stat_metatokens failed: %s", - lua_tostring (L, -1)); - lua_pop (L, 1); - } else { - if (lua_gettop (L) > old_top && - lua_istable (L, old_top + 1)) { - lua_pushvalue (L, old_top + 1); - /* Iterate over table of tables */ - for (lua_pushnil (L); lua_next (L, -2); - lua_pop (L, 1)) { - elt.flags = RSPAMD_STAT_TOKEN_FLAG_META| - RSPAMD_STAT_TOKEN_FLAG_LUA_META; - - if (lua_isnumber (L, -1)) { - gdouble num = lua_tonumber (L, -1); - guint8 *pnum = rspamd_mempool_alloc ( - task->task_pool, - sizeof (num)); - - msg_debug_task ("got metatoken number: %.2f", - num); - memcpy (pnum, &num, sizeof (num)); - elt.begin = (gchar *) pnum; - elt.len = sizeof (num); - g_array_append_val (ar, elt); - } else if (lua_isstring (L, -1)) { - const gchar *str; - gsize tlen; - - str = lua_tolstring (L, -1, &tlen); - guint8 *pstr = rspamd_mempool_alloc ( - task->task_pool, - tlen); - memcpy (pstr, str, tlen); - - msg_debug_task ("got metatoken string: %*s", - (gint) tlen, str); - elt.begin = (gchar *) pstr; - elt.len = tlen; - g_array_append_val (ar, elt); - } - else if (lua_istable (L, -1)) { - /* Treat that as unigramms */ - for (lua_pushnil (L); lua_next (L, -2); - lua_pop (L, 1)) { - if (lua_isstring (L, -1)) { - const gchar *str; - gsize tlen; - - str = lua_tolstring (L, -1, &tlen); - guint8 *pstr = rspamd_mempool_alloc ( - task->task_pool, - tlen); - memcpy (pstr, str, tlen); - - msg_debug_task ("got unigramm " - "metatoken string: %*s", - (gint) tlen, str); - elt.begin = (gchar *) pstr; - elt.len = tlen; - elt.flags |= RSPAMD_STAT_TOKEN_FLAG_UNIGRAM; - g_array_append_val (ar, elt); - } - } - } - } - } - } - } - } - } - - lua_settop (L, 0); - st_ctx->tokenizer->tokenize_func (st_ctx, - task, - ar, - TRUE, - "META:", - task->tokens); - rspamd_mempool_add_destructor (task->task_pool, rspamd_array_free_hard, ar); } @@ -354,7 +236,7 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, if (pdiff != NULL && (1.0 - *pdiff) * 100.0 > similarity_treshold) { - msg_debug_task ("message has two common parts (%.2f), so skip the last one", + msg_debug_bayes ("message has two common parts (%.2f), so skip the last one", *pdiff); break; } @@ -425,7 +307,7 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx, if (!rspamd_symcache_is_symbol_enabled (task, task->cfg->cache, st->stcf->symbol)) { g_ptr_array_index (task->stat_runtimes, i) = NULL; - msg_debug_task ("symbol %s is disabled, skip classification", + msg_debug_bayes ("symbol %s is disabled, skip classification", st->stcf->symbol); continue; } @@ -574,7 +456,7 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, if (bk_run == NULL) { skip = TRUE; - msg_debug_task ("disable classifier %s as statfile symbol %s is disabled", + msg_debug_bayes ("disable classifier %s as statfile symbol %s is disabled", cl->cfg->name, st->stcf->symbol); break; } @@ -583,7 +465,7 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, if (!skip) { if (cl->cfg->min_tokens > 0 && task->tokens->len < cl->cfg->min_tokens) { - msg_debug_task ( + msg_debug_bayes ( "<%s> contains less tokens than required for %s classifier: " "%ud < %ud", task->message_id, @@ -593,7 +475,7 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, continue; } else if (cl->cfg->max_tokens > 0 && task->tokens->len > cl->cfg->max_tokens) { - msg_debug_task ( + msg_debug_bayes ( "<%s> contains more tokens than allowed for %s classifier: " "%ud > %ud", task->message_id, @@ -1090,7 +972,7 @@ rspamd_stat_has_classifier_symbols (struct rspamd_task *task, if (rspamd_task_find_symbol_result (task, st->stcf->symbol)) { if (is_spam == !!st->stcf->is_spam) { - msg_debug_task ("do not autolearn %s as symbol %s is already " + msg_debug_bayes ("do not autolearn %s as symbol %s is already " "added", is_spam ? "spam" : "ham", st->stcf->symbol); return TRUE; |