aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-17 21:53:49 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-18 23:05:15 +0000
commit665166c376a54f52b070e891780ca6209bbaa2d1 (patch)
tree034b58919947b10b1c5adb85796bc8d1a8ea0ed7
parent5d9fc64987e526b3a3cccd3dcb5f980ccc8b83cb (diff)
downloadrspamd-665166c376a54f52b070e891780ca6209bbaa2d1.tar.gz
rspamd-665166c376a54f52b070e891780ca6209bbaa2d1.zip
Start refactoring of statistics in rspamd.
-rw-r--r--src/libserver/cfg_file.h47
-rw-r--r--src/libserver/cfg_rcl.c28
-rw-r--r--src/libserver/cfg_utils.c6
-rw-r--r--src/libstat/CMakeLists.txt5
-rw-r--r--src/libstat/backends.h43
-rw-r--r--src/libstat/backends/backends.c47
-rw-r--r--src/libstat/backends/mmaped_file.c (renamed from src/libserver/statfile.c)0
-rw-r--r--src/libstat/backends/mmaped_file.h (renamed from src/libserver/statfile.h)0
-rw-r--r--src/libstat/classifiers.h2
-rw-r--r--src/libstat/classifiers/classifiers.c2
-rw-r--r--src/libstat/stat_config.c30
-rw-r--r--src/libstat/tokenizers.h2
-rw-r--r--src/libstat/tokenizers/tokenizers.c5
-rw-r--r--src/lua/lua_classifier.c34
-rw-r--r--src/lua/lua_common.c22
15 files changed, 134 insertions, 139 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 5844a945f..1beb51055 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -125,43 +125,6 @@ struct rspamd_symbols_group {
GList *symbols;
};
-/**
- * Statfile section definition
- */
-struct rspamd_statfile_section {
- guint32 code; /**< section's code */
- guint64 size; /**< size of section */
- double weight; /**< weight coefficient for section */
-};
-
-/**
- * Statfile autolearn parameters
- */
-struct statfile_autolearn_params {
- const gchar *metric; /**< metric name for autolearn triggering */
- double threshold_min; /**< threshold mark */
- double threshold_max; /**< threshold mark */
- GList *symbols; /**< list of symbols */
-};
-
-/**
- * Sync affinity
- */
-enum sync_affinity {
- AFFINITY_NONE = 0,
- AFFINITY_MASTER,
- AFFINITY_SLAVE
-};
-
-/**
- * Binlog params
- */
-struct statfile_binlog_params {
- enum sync_affinity affinity;
- time_t rotate_time;
- gchar *master_addr;
- guint16 master_port;
-};
typedef double (*statfile_normalize_func)(struct rspamd_config *cfg,
long double score, void *params);
@@ -171,15 +134,7 @@ typedef double (*statfile_normalize_func)(struct rspamd_config *cfg,
*/
struct rspamd_statfile_config {
gchar *symbol; /**< symbol of statfile */
- gchar *path; /**< filesystem pattern (with %r or %f) */
gchar *label; /**< label of this statfile */
- gsize size; /**< size of statfile */
- GList *sections; /**< list of sections in statfile */
- struct statfile_autolearn_params *autolearn; /**< autolearn params */
- struct statfile_binlog_params *binlog; /**< binlog params */
- statfile_normalize_func normalizer; /**< function that is used as normaliser */
- void *normalizer_data; /**< normalizer function params */
- gchar *normalizer_str; /**< source string (for dump) */
ucl_object_t *opts; /**< other options */
gboolean is_spam; /**< spam flag */
};
@@ -193,7 +148,7 @@ struct rspamd_classifier_config {
gchar *metric; /**< metric of this classifier */
struct classifier *classifier; /**< classifier interface */
struct tokenizer *tokenizer; /**< tokenizer used for classifier */
- GHashTable *opts; /**< other options */
+ ucl_object_t *opts; /**< other options */
GList *pre_callbacks; /**< list of callbacks that are called before classification */
GList *post_callbacks; /**< list of callbacks that are called after classification */
};
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index 6c77292aa..921464219 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -889,14 +889,6 @@ rspamd_rcl_statfile_handler (struct rspamd_config *cfg, const ucl_object_t *obj,
return FALSE;
}
- if (st->path == NULL) {
- g_set_error (err,
- CFG_RCL_ERROR,
- EINVAL,
- "statfile must have a path defined");
- return FALSE;
- }
-
st->opts = (ucl_object_t *)obj;
val = ucl_object_find_key (obj, "spam");
@@ -967,7 +959,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg,
if (found == NULL) {
ccf = rspamd_config_new_classifier (cfg, NULL);
- ccf->classifier = get_classifier (type);
+ ccf->classifier = rspamd_stat_get_classifier (type);
}
else {
ccf = found;
@@ -997,13 +989,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg,
}
else if (g_ascii_strcasecmp (key,
"tokenizer") == 0 && val->type == UCL_STRING) {
- ccf->tokenizer = get_tokenizer (ucl_object_tostring (val));
- }
- else {
- /* Just insert a value of option to the hash */
- g_hash_table_insert (ccf->opts,
- (gpointer)key,
- (gpointer)ucl_object_tostring_forced (val));
+ ccf->tokenizer = rspamd_stat_get_tokenizer (ucl_object_tostring (val));
}
}
}
@@ -1404,21 +1390,11 @@ rspamd_rcl_config_init (void)
G_STRUCT_OFFSET (struct rspamd_statfile_config, symbol),
0);
rspamd_rcl_add_default_handler (ssub,
- "path",
- rspamd_rcl_parse_struct_string,
- G_STRUCT_OFFSET (struct rspamd_statfile_config, path),
- RSPAMD_CL_FLAG_STRING_PATH);
- rspamd_rcl_add_default_handler (ssub,
"label",
rspamd_rcl_parse_struct_string,
G_STRUCT_OFFSET (struct rspamd_statfile_config, label),
0);
rspamd_rcl_add_default_handler (ssub,
- "size",
- rspamd_rcl_parse_struct_integer,
- G_STRUCT_OFFSET (struct rspamd_statfile_config, size),
- RSPAMD_CL_FLAG_INT_SIZE);
- rspamd_rcl_add_default_handler (ssub,
"spam",
rspamd_rcl_parse_struct_boolean,
G_STRUCT_OFFSET (struct rspamd_statfile_config, is_spam),
diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c
index b53a2690c..c9a9555b1 100644
--- a/src/libserver/cfg_utils.c
+++ b/src/libserver/cfg_utils.c
@@ -498,12 +498,6 @@ rspamd_config_new_classifier (struct rspamd_config *cfg,
rspamd_mempool_alloc0 (cfg->cfg_pool,
sizeof (struct rspamd_classifier_config));
}
- if (c->opts == NULL) {
- c->opts = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
- rspamd_mempool_add_destructor (cfg->cfg_pool,
- (rspamd_mempool_destruct_t) g_hash_table_destroy,
- c->opts);
- }
if (c->labels == NULL) {
c->labels = g_hash_table_new_full (rspamd_str_hash,
rspamd_str_equal,
diff --git a/src/libstat/CMakeLists.txt b/src/libstat/CMakeLists.txt
index 810570f20..f1692de63 100644
--- a/src/libstat/CMakeLists.txt
+++ b/src/libstat/CMakeLists.txt
@@ -1,11 +1,14 @@
# Librspamdserver
SET(LIBSTATSRC
- )
+ stat_config.c)
SET(TOKENIZERSSRC tokenizers/tokenizers.c
tokenizers/osb.c)
SET(CLASSIFIERSSRC classifiers/classifiers.c
classifiers/bayes.c)
+
+SET(BACKENDSSRC backends/backends.c
+ backends/mmaped_file.c)
ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} ${TOKENIZERSSRC} ${CLASSIFIERSSRC})
IF(NOT DEBIAN_BUILD)
diff --git a/src/libstat/backends.h b/src/libstat/backends.h
new file mode 100644
index 000000000..04710b4b2
--- /dev/null
+++ b/src/libstat/backends.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef BACKENDS_H_
+#define BACKENDS_H_
+
+#include "config.h"
+#include "cfg_file.h"
+
+#define RSPAMD_DEFAULT_BACKEND "mmap"
+
+struct rspamd_stat_backend {
+ const char *name;
+ gpointer (*init)(rspamd_mempool_t *pool, struct rspamd_statfile_config *cfg);
+ gpointer ctx;
+};
+
+extern struct rspamd_stat_backend statfile_backends[];
+
+struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name);
+
+#endif /* BACKENDS_H_ */
diff --git a/src/libstat/backends/backends.c b/src/libstat/backends/backends.c
new file mode 100644
index 000000000..815a66dbd
--- /dev/null
+++ b/src/libstat/backends/backends.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "main.h"
+#include "backends.h"
+#include "mmaped_file.h"
+
+struct rspamd_stat_backend statfile_backends[] = {
+ {RSPAMD_DEFAULT_BACKEND, }
+};
+
+
+struct rspamd_stat_backend *
+rspamd_stat_get_backend (const char *name)
+{
+ guint i;
+
+ for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) {
+ if (strcmp (statfile_backends[i].name, name) == 0) {
+ return &statfile_backends[i];
+ }
+ }
+
+ return NULL;
+}
diff --git a/src/libserver/statfile.c b/src/libstat/backends/mmaped_file.c
index 066671a95..066671a95 100644
--- a/src/libserver/statfile.c
+++ b/src/libstat/backends/mmaped_file.c
diff --git a/src/libserver/statfile.h b/src/libstat/backends/mmaped_file.h
index f7f632703..f7f632703 100644
--- a/src/libserver/statfile.h
+++ b/src/libstat/backends/mmaped_file.h
diff --git a/src/libstat/classifiers.h b/src/libstat/classifiers.h
index d13178486..2c2f33449 100644
--- a/src/libstat/classifiers.h
+++ b/src/libstat/classifiers.h
@@ -45,7 +45,7 @@ struct classifier {
};
/* Get classifier structure by name or return NULL if this name is not found */
-struct classifier * get_classifier (const char *name);
+struct classifier * rspamd_stat_get_classifier (const char *name);
/* Bayes algorithm */
struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
diff --git a/src/libstat/classifiers/classifiers.c b/src/libstat/classifiers/classifiers.c
index 6af7d2dc8..a3efb53c1 100644
--- a/src/libstat/classifiers/classifiers.c
+++ b/src/libstat/classifiers/classifiers.c
@@ -40,7 +40,7 @@ struct classifier classifiers[] = {
};
struct classifier *
-get_classifier (const char *name)
+rspamd_stat_get_classifier (const char *name)
{
guint i;
diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c
new file mode 100644
index 000000000..fd2c0f165
--- /dev/null
+++ b/src/libstat/stat_config.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "stat_api.h"
+#include "main.h"
+#include "cfg_rcl.h"
+
diff --git a/src/libstat/tokenizers.h b/src/libstat/tokenizers.h
index ed47e0add..c0d2e8934 100644
--- a/src/libstat/tokenizers.h
+++ b/src/libstat/tokenizers.h
@@ -33,7 +33,7 @@ struct tokenizer {
int token_node_compare_func (gconstpointer a, gconstpointer b);
/* Get tokenizer structure by name or return NULL if this name is not found */
-struct tokenizer * get_tokenizer (const char *name);
+struct tokenizer * rspamd_stat_get_tokenizer (const char *name);
/* Get next word from specified f_str_t buf */
gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf,
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index 3e6c745ec..ce221397d 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -26,7 +26,6 @@
* Common tokenization functions
*/
-#include <sys/types.h>
#include "main.h"
#include "tokenizers.h"
@@ -77,7 +76,7 @@ const gchar t_delimiters[255] = {
};
struct tokenizer *
-get_tokenizer (const char *name)
+rspamd_stat_get_tokenizer (const char *name)
{
guint i;
@@ -230,7 +229,7 @@ tokenize_subject (struct rspamd_task *task, GTree ** tree)
(rspamd_mempool_destruct_t) g_tree_destroy, *tree);
}
- osb_tokenizer = get_tokenizer ("osb-text");
+ osb_tokenizer = rspamd_stat_get_tokenizer ("osb-text");
/* Try to use pre-defined subject */
if (task->subject != NULL) {
diff --git a/src/lua/lua_classifier.c b/src/lua/lua_classifier.c
index 346f5d64b..7adc473ba 100644
--- a/src/lua/lua_classifier.c
+++ b/src/lua/lua_classifier.c
@@ -45,16 +45,12 @@ static const struct luaL_reg classifierlib_m[] = {
LUA_FUNCTION_DEF (statfile, get_symbol);
LUA_FUNCTION_DEF (statfile, get_label);
-LUA_FUNCTION_DEF (statfile, get_path);
-LUA_FUNCTION_DEF (statfile, get_size);
LUA_FUNCTION_DEF (statfile, is_spam);
LUA_FUNCTION_DEF (statfile, get_param);
static const struct luaL_reg statfilelib_m[] = {
LUA_INTERFACE_DEF (statfile, get_symbol),
LUA_INTERFACE_DEF (statfile, get_label),
- LUA_INTERFACE_DEF (statfile, get_path),
- LUA_INTERFACE_DEF (statfile, get_size),
LUA_INTERFACE_DEF (statfile, is_spam),
LUA_INTERFACE_DEF (statfile, get_param),
{"__tostring", rspamd_lua_class_tostring},
@@ -352,36 +348,6 @@ lua_statfile_get_label (lua_State *L)
}
static gint
-lua_statfile_get_path (lua_State *L)
-{
- struct rspamd_statfile_config *st = lua_check_statfile (L);
-
- if (st != NULL) {
- lua_pushstring (L, st->path);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
-}
-
-static gint
-lua_statfile_get_size (lua_State *L)
-{
- struct rspamd_statfile_config *st = lua_check_statfile (L);
-
- if (st != NULL) {
- lua_pushinteger (L, st->size);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
-}
-
-static gint
lua_statfile_is_spam (lua_State *L)
{
struct rspamd_statfile_config *st = lua_check_statfile (L);
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c
index 8df878585..73471719b 100644
--- a/src/lua/lua_common.c
+++ b/src/lua/lua_common.c
@@ -355,9 +355,8 @@ gboolean
rspamd_init_lua_filters (struct rspamd_config *cfg)
{
struct rspamd_config **pcfg;
- GList *cur, *tmp;
+ GList *cur;
struct script_module *module;
- struct rspamd_statfile_config *st;
lua_State *L = cfg->lua_state;
cur = g_list_first (cfg->script_modules);
@@ -395,24 +394,7 @@ rspamd_init_lua_filters (struct rspamd_config *cfg)
}
cur = g_list_next (cur);
}
- /* Init statfiles normalizers */
- cur = g_list_first (cfg->statfiles);
- while (cur) {
- st = cur->data;
- if (st->normalizer == rspamd_lua_normalize) {
- tmp = st->normalizer_data;
- if (tmp && (tmp = g_list_next (tmp))) {
- if (tmp->data) {
- /* Code must be loaded from data */
- if (luaL_loadstring (L, tmp->data) != 0) {
- msg_info ("cannot load normalizer code %s", tmp->data);
- return FALSE;
- }
- }
- }
- }
- cur = g_list_next (cur);
- }
+
/* Assign state */
cfg->lua_state = L;