diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-10-06 17:43:17 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-10-06 18:13:04 +0100 |
commit | 34e24051282cab18f3da7a55d247bf61448defda (patch) | |
tree | f564f8ac8ab1b43eb61d6c121818f06c0b57a8f8 | |
parent | 11332ebba77d12d62dcc13966ea24379c2ee94b4 (diff) | |
download | rspamd-34e24051282cab18f3da7a55d247bf61448defda.tar.gz rspamd-34e24051282cab18f3da7a55d247bf61448defda.zip |
[Feature] Add a generic lua classifier
-rw-r--r-- | src/libserver/cfg_file.h | 4 | ||||
-rw-r--r-- | src/libstat/CMakeLists.txt | 3 | ||||
-rw-r--r-- | src/libstat/classifiers/bayes.c | 4 | ||||
-rw-r--r-- | src/libstat/classifiers/classifiers.h | 18 | ||||
-rw-r--r-- | src/libstat/classifiers/lua_classifier.c | 46 | ||||
-rw-r--r-- | src/libstat/stat_config.c | 85 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 10 |
7 files changed, 152 insertions, 18 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 3bfeee98c..2f671135e 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -140,6 +140,10 @@ struct rspamd_tokenizer_config { * (e.g. redis) */ #define RSPAMD_FLAG_CLASSIFIER_INCREMENTING_BACKEND (1 << 1) +/* + * No backend required for classifier + */ +#define RSPAMD_FLAG_CLASSIFIER_NO_BACKEND (1 << 2) /** * Classifier config definition diff --git a/src/libstat/CMakeLists.txt b/src/libstat/CMakeLists.txt index 11f48bdc0..0bc920616 100644 --- a/src/libstat/CMakeLists.txt +++ b/src/libstat/CMakeLists.txt @@ -5,7 +5,8 @@ SET(LIBSTATSRC ${CMAKE_CURRENT_SOURCE_DIR}/stat_config.c SET(TOKENIZERSSRC ${CMAKE_CURRENT_SOURCE_DIR}/tokenizers/tokenizers.c ${CMAKE_CURRENT_SOURCE_DIR}/tokenizers/osb.c) -SET(CLASSIFIERSSRC ${CMAKE_CURRENT_SOURCE_DIR}/classifiers/bayes.c) +SET(CLASSIFIERSSRC ${CMAKE_CURRENT_SOURCE_DIR}/classifiers/bayes.c + ${CMAKE_CURRENT_SOURCE_DIR}/classifiers/lua_classifier.c) SET(BACKENDSSRC ${CMAKE_CURRENT_SOURCE_DIR}/backends/mmaped_file.c ${CMAKE_CURRENT_SOURCE_DIR}/backends/sqlite3_backend.c) diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 5ebba8d56..40dcdf36f 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -185,10 +185,12 @@ bayes_normalize_prob (gdouble x) return a*x4 + b*x3 + c*x2 + d*xx; } -void +gboolean bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier *cl) { cl->cfg->flags |= RSPAMD_FLAG_CLASSIFIER_INTEGER; + + return TRUE; } gboolean diff --git a/src/libstat/classifiers/classifiers.h b/src/libstat/classifiers/classifiers.h index 6bafa8507..e30f2153a 100644 --- a/src/libstat/classifiers/classifiers.h +++ b/src/libstat/classifiers/classifiers.h @@ -16,7 +16,7 @@ struct token_node_s; struct rspamd_stat_classifier { char *name; - void (*init_func)(rspamd_mempool_t *pool, + gboolean (*init_func)(rspamd_mempool_t *pool, struct rspamd_classifier *cl); gboolean (*classify_func)(struct rspamd_classifier * ctx, GPtrArray *tokens, @@ -30,7 +30,7 @@ struct rspamd_stat_classifier { }; /* Bayes algorithm */ -void bayes_init (rspamd_mempool_t *pool, +gboolean bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier *); gboolean bayes_classify (struct rspamd_classifier *ctx, GPtrArray *tokens, @@ -42,6 +42,20 @@ gboolean bayes_learn_spam (struct rspamd_classifier *ctx, gboolean unlearn, GError **err); +/* Generic lua classifier */ +gboolean lua_classifier_init (rspamd_mempool_t *pool, + struct rspamd_classifier *); +gboolean lua_classifier_classify (struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task); +gboolean lua_classifier_learn_spam (struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err); + + #endif /* * vi:ts=4 diff --git a/src/libstat/classifiers/lua_classifier.c b/src/libstat/classifiers/lua_classifier.c new file mode 100644 index 000000000..dea0f6a24 --- /dev/null +++ b/src/libstat/classifiers/lua_classifier.c @@ -0,0 +1,46 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "classifiers.h" +#include "cfg_file.h" +#include "stat_internal.h" + +gboolean +lua_classifier_init (rspamd_mempool_t *pool, + struct rspamd_classifier *cl) +{ + cl->cfg->flags |= RSPAMD_FLAG_CLASSIFIER_NO_BACKEND; + + return TRUE; +} +gboolean +lua_classifier_classify (struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task) +{ + return TRUE; +} + +gboolean +lua_classifier_learn_spam (struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err) +{ + return TRUE; +} diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c index 3856fc117..48f572468 100644 --- a/src/libstat/stat_config.c +++ b/src/libstat/stat_config.c @@ -19,9 +19,17 @@ #include "rspamd.h" #include "cfg_rcl.h" #include "stat_internal.h" +#include "lua/lua_common.h" static struct rspamd_stat_ctx *stat_ctx = NULL; +static struct rspamd_stat_classifier lua_classifier = { + .name = "lua", + .init_func = lua_classifier_init, + .classify_func = lua_classifier_classify, + .learn_spam_func = lua_classifier_learn_spam, +}; + static struct rspamd_stat_classifier stat_classifiers[] = { { .name = "bayes", @@ -95,15 +103,55 @@ rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base) struct rspamd_classifier *cl; const ucl_object_t *cache_obj = NULL, *cache_name_obj; const gchar *cache_name = NULL; + lua_State *L = cfg->lua_state; + guint lua_classifiers_cnt = 0, i; if (stat_ctx == NULL) { stat_ctx = g_slice_alloc0 (sizeof (*stat_ctx)); } + lua_getglobal (L, "rspamd_classifiers"); + + if (lua_type (L, -1) == LUA_TTABLE) { + lua_pushnil (L); + + while (lua_next (L, -1) != 0) { + lua_classifiers_cnt ++; + lua_pop (L, 1); + } + } + + lua_pop (L, 1); + + stat_ctx->classifiers_count = G_N_ELEMENTS (stat_classifiers) + + lua_classifiers_cnt; + stat_ctx->classifiers_subrs = g_new0 (struct rspamd_stat_classifier, + stat_ctx->classifiers_count); + + for (i = 0; i < G_N_ELEMENTS (stat_classifiers); i ++) { + memcpy (&stat_ctx->classifiers_subrs[i], &stat_classifiers[i], + sizeof (struct rspamd_stat_classifier)); + } + + lua_getglobal (L, "rspamd_classifiers"); + + if (lua_type (L, -1) == LUA_TTABLE) { + lua_pushnil (L); + + while (lua_next (L, -1) != 0) { + lua_pushvalue (L, -2); + memcpy (&stat_ctx->classifiers_subrs[i], &lua_classifier, + sizeof (struct rspamd_stat_classifier)); + stat_ctx->classifiers_subrs[i].name = g_strdup (lua_tostring (L, -1)); + i ++; + lua_pop (L, 2); + } + } + + lua_pop (L, 1); stat_ctx->backends_subrs = stat_backends; stat_ctx->backends_count = G_N_ELEMENTS (stat_backends); - stat_ctx->classifiers_subrs = stat_classifiers; - stat_ctx->classifiers_count = G_N_ELEMENTS (stat_classifiers); + stat_ctx->tokenizers_subrs = stat_tokenizers; stat_ctx->tokenizers_count = G_N_ELEMENTS (stat_tokenizers); stat_ctx->caches_subrs = stat_caches; @@ -120,15 +168,32 @@ rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base) while (cur) { clf = cur->data; - bk = rspamd_stat_get_backend (clf->backend); + cl = g_slice_alloc0 (sizeof (*cl)); + cl->cfg = clf; + cl->ctx = stat_ctx; + cl->statfiles_ids = g_array_new (FALSE, FALSE, sizeof (gint)); + cl->subrs = rspamd_stat_get_classifier (clf->classifier); + g_assert (cl->subrs != NULL); - if (bk == NULL) { - msg_err_config ("cannot get backend of type %s, so disable classifier" - " %s completely", clf->backend, clf->name); + + if (!cl->subrs->init_func (cfg->cfg_pool, cl)) { + g_slice_free1 (sizeof (*cl), cl); + msg_err_config ("cannot init classifier type %s", clf->name); cur = g_list_next (cur); continue; } + if (!(clf->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND)) { + bk = rspamd_stat_get_backend (clf->backend); + + if (bk == NULL) { + msg_err_config ("cannot get backend of type %s, so disable classifier" + " %s completely", clf->backend, clf->name); + cur = g_list_next (cur); + continue; + } + } + /* XXX: * Here we get the first classifier tokenizer config as the only one * We NO LONGER support multiple tokenizers per rspamd instance @@ -140,14 +205,6 @@ rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base) clf->tokenizer, NULL); } - cl = g_slice_alloc0 (sizeof (*cl)); - cl->cfg = clf; - cl->ctx = stat_ctx; - cl->statfiles_ids = g_array_new (FALSE, FALSE, sizeof (gint)); - cl->subrs = rspamd_stat_get_classifier (clf->classifier); - g_assert (cl->subrs != NULL); - cl->subrs->init_func (cfg->cfg_pool, cl); - /* Init classifier cache */ cache_name = NULL; diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 6a1480ec5..228360fa6 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -667,6 +667,11 @@ rspamd_stat_backends_learn (struct rspamd_stat_ctx *st_ctx, continue; } + if (cl->cfg->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND) { + res = TRUE; + continue; + } + sel = cl; for (j = 0; j < cl->statfiles_ids->len; j ++) { @@ -759,6 +764,11 @@ rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx, cl->cache->learn (task, spam, cache_run); } + if (cl->cfg->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND) { + res = TRUE; + continue; + } + for (j = 0; j < cl->statfiles_ids->len; j ++) { id = g_array_index (cl->statfiles_ids, gint, j); st = g_ptr_array_index (st_ctx->statfiles, id); |