From 8a05515078bc8fd3d642778fcae0d005a38ec7b0 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 23 Jan 2015 13:50:17 +0000 Subject: [PATCH] Reorganize libstat API. --- src/libstat/CMakeLists.txt | 9 +-- src/libstat/backends/backends.c | 50 --------------- src/libstat/{ => backends}/backends.h | 10 +-- src/libstat/backends/mmaped_file.c | 1 - src/libstat/backends/mmaped_file.h | 20 ------ src/libstat/classifiers/classifiers.c | 56 ----------------- src/libstat/{ => classifiers}/classifiers.h | 15 +---- src/libstat/stat_api.h | 29 --------- src/libstat/stat_config.c | 30 +++++++++ src/libstat/stat_internal.h | 70 +++++++++++++++++++++ src/libstat/tokenizers/tokenizers.c | 18 ------ src/libstat/{ => tokenizers}/tokenizers.h | 6 -- 12 files changed, 110 insertions(+), 204 deletions(-) delete mode 100644 src/libstat/backends/backends.c rename src/libstat/{ => backends}/backends.h (89%) delete mode 100644 src/libstat/backends/mmaped_file.h delete mode 100644 src/libstat/classifiers/classifiers.c rename src/libstat/{ => classifiers}/classifiers.h (76%) create mode 100644 src/libstat/stat_internal.h rename src/libstat/{ => tokenizers}/tokenizers.h (84%) diff --git a/src/libstat/CMakeLists.txt b/src/libstat/CMakeLists.txt index 4c02c49c6..97725d2f9 100644 --- a/src/libstat/CMakeLists.txt +++ b/src/libstat/CMakeLists.txt @@ -1,14 +1,11 @@ # Librspamdserver -SET(LIBSTATSRC - stat_config.c) +SET(LIBSTATSRC stat_config.c) SET(TOKENIZERSSRC tokenizers/tokenizers.c tokenizers/osb.c) -SET(CLASSIFIERSSRC classifiers/classifiers.c - classifiers/bayes.c) +SET(CLASSIFIERSSRC classifiers/bayes.c) -SET(BACKENDSSRC backends/backends.c - backends/mmaped_file.c) +SET(BACKENDSSRC backends/mmaped_file.c) ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} ${TOKENIZERSSRC} diff --git a/src/libstat/backends/backends.c b/src/libstat/backends/backends.c deleted file mode 100644 index 0701a2ff9..000000000 --- a/src/libstat/backends/backends.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2015, Vsevolod Stakhov - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "main.h" -#include "backends.h" -#include "mmaped_file.h" - -struct rspamd_stat_backend statfile_backends[] = { - { - .name = RSPAMD_DEFAULT_BACKEND, - .init = rspamd_mmaped_file_init, - } -}; - - -struct rspamd_stat_backend * -rspamd_stat_get_backend (const char *name) -{ - guint i; - - for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) { - if (strcmp (statfile_backends[i].name, name) == 0) { - return &statfile_backends[i]; - } - } - - return NULL; -} diff --git a/src/libstat/backends.h b/src/libstat/backends/backends.h similarity index 89% rename from src/libstat/backends.h rename to src/libstat/backends/backends.h index ecd41173d..37a7560cd 100644 --- a/src/libstat/backends.h +++ b/src/libstat/backends/backends.h @@ -26,18 +26,20 @@ #define BACKENDS_H_ #include "config.h" -#include "cfg_file.h" #define RSPAMD_DEFAULT_BACKEND "mmap" +/* Forwarded declarations */ +struct rspamd_classifier_config; +struct rspamd_statfile_config; +struct rspamd_config; + struct rspamd_stat_backend { const char *name; gpointer (*init)(struct rspamd_statfile_config *cfg); gpointer ctx; }; -extern struct rspamd_stat_backend statfile_backends[]; - -struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name); +gpointer rspamd_mmaped_file_init(struct rspamd_config *cfg); #endif /* BACKENDS_H_ */ diff --git a/src/libstat/backends/mmaped_file.c b/src/libstat/backends/mmaped_file.c index 9ac725fe7..cef17f465 100644 --- a/src/libstat/backends/mmaped_file.c +++ b/src/libstat/backends/mmaped_file.c @@ -24,7 +24,6 @@ #include "config.h" -#include "mmaped_file.h" #include "main.h" #define CHAIN_LENGTH 128 diff --git a/src/libstat/backends/mmaped_file.h b/src/libstat/backends/mmaped_file.h deleted file mode 100644 index f3f25c8cb..000000000 --- a/src/libstat/backends/mmaped_file.h +++ /dev/null @@ -1,20 +0,0 @@ -/** - * @file statfile.h - * Describes common methods for accessing statistics files and caching them in memory - */ - -#ifndef RSPAMD_STATFILE_H -#define RSPAMD_STATFILE_H - -#include "config.h" - - -/* Forwarded declarations */ -struct rspamd_classifier_config; -struct rspamd_statfile_config; -struct rspamd_config; - -gpointer -rspamd_mmaped_file_init(struct rspamd_config *cfg); - -#endif diff --git a/src/libstat/classifiers/classifiers.c b/src/libstat/classifiers/classifiers.c deleted file mode 100644 index 4d78f1f81..000000000 --- a/src/libstat/classifiers/classifiers.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Common classifier functions - */ - -#include "classifiers.h" - -struct classifier classifiers[] = { - { - .name = "bayes", - .init_func = bayes_init, - .classify_func = bayes_classify, - .learn_spam_func = bayes_learn_spam, - } -}; - -struct classifier * -rspamd_stat_get_classifier (const char *name) -{ - guint i; - - for (i = 0; i < sizeof (classifiers) / sizeof (classifiers[0]); i++) { - if (strcmp (classifiers[i].name, name) == 0) { - return &classifiers[i]; - } - } - - return NULL; -} - -/* - * vi:ts=4 - */ diff --git a/src/libstat/classifiers.h b/src/libstat/classifiers/classifiers.h similarity index 76% rename from src/libstat/classifiers.h rename to src/libstat/classifiers/classifiers.h index 8c05bc6db..4ae1ba23d 100644 --- a/src/libstat/classifiers.h +++ b/src/libstat/classifiers/classifiers.h @@ -2,9 +2,6 @@ #define CLASSIFIERS_H #include "config.h" -#include "mem_pool.h" -#include "tokenizers.h" -#include /* Consider this value as 0 */ #define ALPHA 0.0001 @@ -12,6 +9,7 @@ struct rspamd_classifier_config; struct rspamd_task; +/* Common classifier structure */ struct classifier_ctx { rspamd_mempool_t *pool; GHashTable *results; @@ -19,12 +17,6 @@ struct classifier_ctx { struct rspamd_classifier_config *cfg; }; -struct classify_weight { - const char *name; - long double weight; -}; - -/* Common classifier structure */ struct classifier { char *name; struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool, @@ -37,9 +29,6 @@ struct classifier { GError **err); }; -/* Get classifier structure by name or return NULL if this name is not found */ -struct classifier * rspamd_stat_get_classifier (const char *name); - /* Bayes algorithm */ struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier_config *cf); @@ -53,8 +42,6 @@ gboolean bayes_learn_spam (struct classifier_ctx * ctx, gboolean is_spam, lua_State *L, GError **err); -/* Array of all defined classifiers */ -extern struct classifier classifiers[]; #endif /* diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h index 64b3f0b92..0e2bf86b8 100644 --- a/src/libstat/stat_api.h +++ b/src/libstat/stat_api.h @@ -31,35 +31,6 @@ * High level statistics API */ -struct rspamd_statfile_runtime { - struct rspamd_statfile_config *st; - guint64 hits; - guint64 total_hits; -}; - -struct rspamd_classifier_runtime { - double ham_prob; - double spam_prob; - guint64 total_spam; - guint64 total_ham; - guint64 processed_tokens; - gsize max_tokens; -}; - -struct rspamd_token_result { - double value; - struct rspamd_statfile_runtime *st_runtime; - - struct rspamd_classifier_runtime *cl_runtime; -}; - -#define RSPAMD_MAX_TOKEN_LEN 64 -typedef struct token_node_s { - guchar data[RSPAMD_MAX_TOKEN_LEN]; - guint datalen; - GArray *results; -} rspamd_token_t; - /** * Initialise statistics modules * @param cfg diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c index fd2c0f165..a539f3c80 100644 --- a/src/libstat/stat_config.c +++ b/src/libstat/stat_config.c @@ -27,4 +27,34 @@ #include "stat_api.h" #include "main.h" #include "cfg_rcl.h" +#include "stat_internal.h" +#include "backends/mmaped_file.h" +static struct rspamd_stat_ctx *stat_ctx = NULL; + +static struct classifier classifiers[] = { + { + .name = "bayes", + .init_func = bayes_init, + .classify_func = bayes_classify, + .learn_spam_func = bayes_learn_spam, + } +}; + +static struct tokenizer tokenizers[] = { + {"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word}, +}; + +struct rspamd_stat_backend statfile_backends[] = { + { + .name = RSPAMD_DEFAULT_BACKEND, + .init = rspamd_mmaped_file_init, + } +}; + + +void +rspamd_stat_init (struct rspamd_config *cfg) +{ + +} diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h new file mode 100644 index 000000000..4aa625b27 --- /dev/null +++ b/src/libstat/stat_internal.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef STAT_INTERNAL_H_ +#define STAT_INTERNAL_H_ + +#include "config.h" +#include "task.h" +#include "classifiers/classifiers.h" +#include "tokenizers/tokenizers.h" +#include "backends/backends.h" + +struct rspamd_statfile_runtime { + struct rspamd_statfile_config *st; + guint64 hits; + guint64 total_hits; +}; + +struct rspamd_classifier_runtime { + double ham_prob; + double spam_prob; + guint64 total_spam; + guint64 total_ham; + guint64 processed_tokens; + gsize max_tokens; +}; + +struct rspamd_token_result { + double value; + struct rspamd_statfile_runtime *st_runtime; + + struct rspamd_classifier_runtime *cl_runtime; +}; + +#define RSPAMD_MAX_TOKEN_LEN 64 +typedef struct token_node_s { + guchar data[RSPAMD_MAX_TOKEN_LEN]; + guint datalen; + GArray *results; +} rspamd_token_t; + +struct rspamd_stat_ctx { + struct classifier *classifiers; + guint classifiers_count; + struct tokenizer *tokenizers; + guint tokenizers_count; + struct rspamd_stat_backend *backends; + guint backends_count; +}; + +#endif /* STAT_INTERNAL_H_ */ diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 10e4b92d5..eee41a971 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -29,10 +29,6 @@ #include "main.h" #include "tokenizers.h" -struct tokenizer tokenizers[] = { - {"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word}, -}; - const int primes[] = { 1, 7, 3, 13, @@ -75,20 +71,6 @@ const gchar t_delimiters[255] = { 0, 0, 0, 0, 0 }; -struct tokenizer * -rspamd_stat_get_tokenizer (const char *name) -{ - guint i; - - for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) { - if (strcmp (tokenizers[i].name, name) == 0) { - return &tokenizers[i]; - } - } - - return NULL; -} - int token_node_compare_func (gconstpointer a, gconstpointer b) { diff --git a/src/libstat/tokenizers.h b/src/libstat/tokenizers/tokenizers.h similarity index 84% rename from src/libstat/tokenizers.h rename to src/libstat/tokenizers/tokenizers.h index 73d07a5c4..8ee11cea1 100644 --- a/src/libstat/tokenizers.h +++ b/src/libstat/tokenizers/tokenizers.h @@ -23,9 +23,6 @@ struct tokenizer { /* Compare two token nodes */ int token_node_compare_func (gconstpointer a, gconstpointer b); -/* Get tokenizer structure by name or return NULL if this name is not found */ -struct tokenizer * rspamd_stat_get_tokenizer (const char *name); - /* Get next word from specified f_str_t buf */ gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions); @@ -46,9 +43,6 @@ int osb_tokenize_text (struct tokenizer *tokenizer, /* Make tokens for a subject */ void tokenize_subject (struct rspamd_task *task, GTree ** tree); -/* Array of all defined tokenizers */ -extern struct tokenizer tokenizers[]; - #endif /* * vi:ts=4 -- 2.39.5