From 665166c376a54f52b070e891780ca6209bbaa2d1 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 17 Jan 2015 21:53:49 +0000 Subject: [PATCH] Start refactoring of statistics in rspamd. --- src/libserver/cfg_file.h | 47 +------------------ src/libserver/cfg_rcl.c | 28 +---------- src/libserver/cfg_utils.c | 6 --- src/libstat/CMakeLists.txt | 5 +- src/libstat/backends.h | 43 +++++++++++++++++ src/libstat/backends/backends.c | 47 +++++++++++++++++++ .../backends/mmaped_file.c} | 0 .../backends/mmaped_file.h} | 0 src/libstat/classifiers.h | 2 +- src/libstat/classifiers/classifiers.c | 2 +- src/libstat/stat_config.c | 30 ++++++++++++ src/libstat/tokenizers.h | 2 +- src/libstat/tokenizers/tokenizers.c | 5 +- src/lua/lua_classifier.c | 34 -------------- src/lua/lua_common.c | 22 +-------- 15 files changed, 134 insertions(+), 139 deletions(-) create mode 100644 src/libstat/backends.h create mode 100644 src/libstat/backends/backends.c rename src/{libserver/statfile.c => libstat/backends/mmaped_file.c} (100%) rename src/{libserver/statfile.h => libstat/backends/mmaped_file.h} (100%) create mode 100644 src/libstat/stat_config.c diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 5844a945f..1beb51055 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -125,43 +125,6 @@ struct rspamd_symbols_group { GList *symbols; }; -/** - * Statfile section definition - */ -struct rspamd_statfile_section { - guint32 code; /**< section's code */ - guint64 size; /**< size of section */ - double weight; /**< weight coefficient for section */ -}; - -/** - * Statfile autolearn parameters - */ -struct statfile_autolearn_params { - const gchar *metric; /**< metric name for autolearn triggering */ - double threshold_min; /**< threshold mark */ - double threshold_max; /**< threshold mark */ - GList *symbols; /**< list of symbols */ -}; - -/** - * Sync affinity - */ -enum sync_affinity { - AFFINITY_NONE = 0, - AFFINITY_MASTER, - AFFINITY_SLAVE -}; - -/** - * Binlog params - */ -struct statfile_binlog_params { - enum sync_affinity affinity; - time_t rotate_time; - gchar *master_addr; - guint16 master_port; -}; typedef double (*statfile_normalize_func)(struct rspamd_config *cfg, long double score, void *params); @@ -171,15 +134,7 @@ typedef double (*statfile_normalize_func)(struct rspamd_config *cfg, */ struct rspamd_statfile_config { gchar *symbol; /**< symbol of statfile */ - gchar *path; /**< filesystem pattern (with %r or %f) */ gchar *label; /**< label of this statfile */ - gsize size; /**< size of statfile */ - GList *sections; /**< list of sections in statfile */ - struct statfile_autolearn_params *autolearn; /**< autolearn params */ - struct statfile_binlog_params *binlog; /**< binlog params */ - statfile_normalize_func normalizer; /**< function that is used as normaliser */ - void *normalizer_data; /**< normalizer function params */ - gchar *normalizer_str; /**< source string (for dump) */ ucl_object_t *opts; /**< other options */ gboolean is_spam; /**< spam flag */ }; @@ -193,7 +148,7 @@ struct rspamd_classifier_config { gchar *metric; /**< metric of this classifier */ struct classifier *classifier; /**< classifier interface */ struct tokenizer *tokenizer; /**< tokenizer used for classifier */ - GHashTable *opts; /**< other options */ + ucl_object_t *opts; /**< other options */ GList *pre_callbacks; /**< list of callbacks that are called before classification */ GList *post_callbacks; /**< list of callbacks that are called after classification */ }; diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index 6c77292aa..921464219 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -889,14 +889,6 @@ rspamd_rcl_statfile_handler (struct rspamd_config *cfg, const ucl_object_t *obj, return FALSE; } - if (st->path == NULL) { - g_set_error (err, - CFG_RCL_ERROR, - EINVAL, - "statfile must have a path defined"); - return FALSE; - } - st->opts = (ucl_object_t *)obj; val = ucl_object_find_key (obj, "spam"); @@ -967,7 +959,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg, if (found == NULL) { ccf = rspamd_config_new_classifier (cfg, NULL); - ccf->classifier = get_classifier (type); + ccf->classifier = rspamd_stat_get_classifier (type); } else { ccf = found; @@ -997,13 +989,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg, } else if (g_ascii_strcasecmp (key, "tokenizer") == 0 && val->type == UCL_STRING) { - ccf->tokenizer = get_tokenizer (ucl_object_tostring (val)); - } - else { - /* Just insert a value of option to the hash */ - g_hash_table_insert (ccf->opts, - (gpointer)key, - (gpointer)ucl_object_tostring_forced (val)); + ccf->tokenizer = rspamd_stat_get_tokenizer (ucl_object_tostring (val)); } } } @@ -1403,21 +1389,11 @@ rspamd_rcl_config_init (void) rspamd_rcl_parse_struct_string, G_STRUCT_OFFSET (struct rspamd_statfile_config, symbol), 0); - rspamd_rcl_add_default_handler (ssub, - "path", - rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct rspamd_statfile_config, path), - RSPAMD_CL_FLAG_STRING_PATH); rspamd_rcl_add_default_handler (ssub, "label", rspamd_rcl_parse_struct_string, G_STRUCT_OFFSET (struct rspamd_statfile_config, label), 0); - rspamd_rcl_add_default_handler (ssub, - "size", - rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct rspamd_statfile_config, size), - RSPAMD_CL_FLAG_INT_SIZE); rspamd_rcl_add_default_handler (ssub, "spam", rspamd_rcl_parse_struct_boolean, diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index b53a2690c..c9a9555b1 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -498,12 +498,6 @@ rspamd_config_new_classifier (struct rspamd_config *cfg, rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_classifier_config)); } - if (c->opts == NULL) { - c->opts = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - rspamd_mempool_add_destructor (cfg->cfg_pool, - (rspamd_mempool_destruct_t) g_hash_table_destroy, - c->opts); - } if (c->labels == NULL) { c->labels = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, diff --git a/src/libstat/CMakeLists.txt b/src/libstat/CMakeLists.txt index 810570f20..f1692de63 100644 --- a/src/libstat/CMakeLists.txt +++ b/src/libstat/CMakeLists.txt @@ -1,11 +1,14 @@ # Librspamdserver SET(LIBSTATSRC - ) + stat_config.c) SET(TOKENIZERSSRC tokenizers/tokenizers.c tokenizers/osb.c) SET(CLASSIFIERSSRC classifiers/classifiers.c classifiers/bayes.c) + +SET(BACKENDSSRC backends/backends.c + backends/mmaped_file.c) ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} ${TOKENIZERSSRC} ${CLASSIFIERSSRC}) IF(NOT DEBIAN_BUILD) diff --git a/src/libstat/backends.h b/src/libstat/backends.h new file mode 100644 index 000000000..04710b4b2 --- /dev/null +++ b/src/libstat/backends.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef BACKENDS_H_ +#define BACKENDS_H_ + +#include "config.h" +#include "cfg_file.h" + +#define RSPAMD_DEFAULT_BACKEND "mmap" + +struct rspamd_stat_backend { + const char *name; + gpointer (*init)(rspamd_mempool_t *pool, struct rspamd_statfile_config *cfg); + gpointer ctx; +}; + +extern struct rspamd_stat_backend statfile_backends[]; + +struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name); + +#endif /* BACKENDS_H_ */ diff --git a/src/libstat/backends/backends.c b/src/libstat/backends/backends.c new file mode 100644 index 000000000..815a66dbd --- /dev/null +++ b/src/libstat/backends/backends.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "main.h" +#include "backends.h" +#include "mmaped_file.h" + +struct rspamd_stat_backend statfile_backends[] = { + {RSPAMD_DEFAULT_BACKEND, } +}; + + +struct rspamd_stat_backend * +rspamd_stat_get_backend (const char *name) +{ + guint i; + + for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) { + if (strcmp (statfile_backends[i].name, name) == 0) { + return &statfile_backends[i]; + } + } + + return NULL; +} diff --git a/src/libserver/statfile.c b/src/libstat/backends/mmaped_file.c similarity index 100% rename from src/libserver/statfile.c rename to src/libstat/backends/mmaped_file.c diff --git a/src/libserver/statfile.h b/src/libstat/backends/mmaped_file.h similarity index 100% rename from src/libserver/statfile.h rename to src/libstat/backends/mmaped_file.h diff --git a/src/libstat/classifiers.h b/src/libstat/classifiers.h index d13178486..2c2f33449 100644 --- a/src/libstat/classifiers.h +++ b/src/libstat/classifiers.h @@ -45,7 +45,7 @@ struct classifier { }; /* Get classifier structure by name or return NULL if this name is not found */ -struct classifier * get_classifier (const char *name); +struct classifier * rspamd_stat_get_classifier (const char *name); /* Bayes algorithm */ struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, diff --git a/src/libstat/classifiers/classifiers.c b/src/libstat/classifiers/classifiers.c index 6af7d2dc8..a3efb53c1 100644 --- a/src/libstat/classifiers/classifiers.c +++ b/src/libstat/classifiers/classifiers.c @@ -40,7 +40,7 @@ struct classifier classifiers[] = { }; struct classifier * -get_classifier (const char *name) +rspamd_stat_get_classifier (const char *name) { guint i; diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c new file mode 100644 index 000000000..fd2c0f165 --- /dev/null +++ b/src/libstat/stat_config.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "stat_api.h" +#include "main.h" +#include "cfg_rcl.h" + diff --git a/src/libstat/tokenizers.h b/src/libstat/tokenizers.h index ed47e0add..c0d2e8934 100644 --- a/src/libstat/tokenizers.h +++ b/src/libstat/tokenizers.h @@ -33,7 +33,7 @@ struct tokenizer { int token_node_compare_func (gconstpointer a, gconstpointer b); /* Get tokenizer structure by name or return NULL if this name is not found */ -struct tokenizer * get_tokenizer (const char *name); +struct tokenizer * rspamd_stat_get_tokenizer (const char *name); /* Get next word from specified f_str_t buf */ gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 3e6c745ec..ce221397d 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -26,7 +26,6 @@ * Common tokenization functions */ -#include #include "main.h" #include "tokenizers.h" @@ -77,7 +76,7 @@ const gchar t_delimiters[255] = { }; struct tokenizer * -get_tokenizer (const char *name) +rspamd_stat_get_tokenizer (const char *name) { guint i; @@ -230,7 +229,7 @@ tokenize_subject (struct rspamd_task *task, GTree ** tree) (rspamd_mempool_destruct_t) g_tree_destroy, *tree); } - osb_tokenizer = get_tokenizer ("osb-text"); + osb_tokenizer = rspamd_stat_get_tokenizer ("osb-text"); /* Try to use pre-defined subject */ if (task->subject != NULL) { diff --git a/src/lua/lua_classifier.c b/src/lua/lua_classifier.c index 346f5d64b..7adc473ba 100644 --- a/src/lua/lua_classifier.c +++ b/src/lua/lua_classifier.c @@ -45,16 +45,12 @@ static const struct luaL_reg classifierlib_m[] = { LUA_FUNCTION_DEF (statfile, get_symbol); LUA_FUNCTION_DEF (statfile, get_label); -LUA_FUNCTION_DEF (statfile, get_path); -LUA_FUNCTION_DEF (statfile, get_size); LUA_FUNCTION_DEF (statfile, is_spam); LUA_FUNCTION_DEF (statfile, get_param); static const struct luaL_reg statfilelib_m[] = { LUA_INTERFACE_DEF (statfile, get_symbol), LUA_INTERFACE_DEF (statfile, get_label), - LUA_INTERFACE_DEF (statfile, get_path), - LUA_INTERFACE_DEF (statfile, get_size), LUA_INTERFACE_DEF (statfile, is_spam), LUA_INTERFACE_DEF (statfile, get_param), {"__tostring", rspamd_lua_class_tostring}, @@ -351,36 +347,6 @@ lua_statfile_get_label (lua_State *L) return 1; } -static gint -lua_statfile_get_path (lua_State *L) -{ - struct rspamd_statfile_config *st = lua_check_statfile (L); - - if (st != NULL) { - lua_pushstring (L, st->path); - } - else { - lua_pushnil (L); - } - - return 1; -} - -static gint -lua_statfile_get_size (lua_State *L) -{ - struct rspamd_statfile_config *st = lua_check_statfile (L); - - if (st != NULL) { - lua_pushinteger (L, st->size); - } - else { - lua_pushnil (L); - } - - return 1; -} - static gint lua_statfile_is_spam (lua_State *L) { diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 8df878585..73471719b 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -355,9 +355,8 @@ gboolean rspamd_init_lua_filters (struct rspamd_config *cfg) { struct rspamd_config **pcfg; - GList *cur, *tmp; + GList *cur; struct script_module *module; - struct rspamd_statfile_config *st; lua_State *L = cfg->lua_state; cur = g_list_first (cfg->script_modules); @@ -395,24 +394,7 @@ rspamd_init_lua_filters (struct rspamd_config *cfg) } cur = g_list_next (cur); } - /* Init statfiles normalizers */ - cur = g_list_first (cfg->statfiles); - while (cur) { - st = cur->data; - if (st->normalizer == rspamd_lua_normalize) { - tmp = st->normalizer_data; - if (tmp && (tmp = g_list_next (tmp))) { - if (tmp->data) { - /* Code must be loaded from data */ - if (luaL_loadstring (L, tmp->data) != 0) { - msg_info ("cannot load normalizer code %s", tmp->data); - return FALSE; - } - } - } - } - cur = g_list_next (cur); - } + /* Assign state */ cfg->lua_state = L; -- 2.39.5