# Librspamdserver
-SET(LIBSTATSRC
- stat_config.c)
+SET(LIBSTATSRC stat_config.c)
SET(TOKENIZERSSRC tokenizers/tokenizers.c
tokenizers/osb.c)
-SET(CLASSIFIERSSRC classifiers/classifiers.c
- classifiers/bayes.c)
+SET(CLASSIFIERSSRC classifiers/bayes.c)
-SET(BACKENDSSRC backends/backends.c
- backends/mmaped_file.c)
+SET(BACKENDSSRC backends/mmaped_file.c)
ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC}
${TOKENIZERSSRC}
+++ /dev/null
-/*
- * Copyright (c) 2015, Vsevolod Stakhov
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef BACKENDS_H_
-#define BACKENDS_H_
-
-#include "config.h"
-#include "cfg_file.h"
-
-#define RSPAMD_DEFAULT_BACKEND "mmap"
-
-struct rspamd_stat_backend {
- const char *name;
- gpointer (*init)(struct rspamd_statfile_config *cfg);
- gpointer ctx;
-};
-
-extern struct rspamd_stat_backend statfile_backends[];
-
-struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name);
-
-#endif /* BACKENDS_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015, Vsevolod Stakhov
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "main.h"
-#include "backends.h"
-#include "mmaped_file.h"
-
-struct rspamd_stat_backend statfile_backends[] = {
- {
- .name = RSPAMD_DEFAULT_BACKEND,
- .init = rspamd_mmaped_file_init,
- }
-};
-
-
-struct rspamd_stat_backend *
-rspamd_stat_get_backend (const char *name)
-{
- guint i;
-
- for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) {
- if (strcmp (statfile_backends[i].name, name) == 0) {
- return &statfile_backends[i];
- }
- }
-
- return NULL;
-}
--- /dev/null
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef BACKENDS_H_
+#define BACKENDS_H_
+
+#include "config.h"
+
+#define RSPAMD_DEFAULT_BACKEND "mmap"
+
+/* Forwarded declarations */
+struct rspamd_classifier_config;
+struct rspamd_statfile_config;
+struct rspamd_config;
+
+struct rspamd_stat_backend {
+ const char *name;
+ gpointer (*init)(struct rspamd_statfile_config *cfg);
+ gpointer ctx;
+};
+
+gpointer rspamd_mmaped_file_init(struct rspamd_config *cfg);
+
+#endif /* BACKENDS_H_ */
#include "config.h"
-#include "mmaped_file.h"
#include "main.h"
#define CHAIN_LENGTH 128
+++ /dev/null
-/**
- * @file statfile.h
- * Describes common methods for accessing statistics files and caching them in memory
- */
-
-#ifndef RSPAMD_STATFILE_H
-#define RSPAMD_STATFILE_H
-
-#include "config.h"
-
-
-/* Forwarded declarations */
-struct rspamd_classifier_config;
-struct rspamd_statfile_config;
-struct rspamd_config;
-
-gpointer
-rspamd_mmaped_file_init(struct rspamd_config *cfg);
-
-#endif
+++ /dev/null
-#ifndef CLASSIFIERS_H
-#define CLASSIFIERS_H
-
-#include "config.h"
-#include "mem_pool.h"
-#include "tokenizers.h"
-#include <lua.h>
-
-/* Consider this value as 0 */
-#define ALPHA 0.0001
-
-struct rspamd_classifier_config;
-struct rspamd_task;
-
-struct classifier_ctx {
- rspamd_mempool_t *pool;
- GHashTable *results;
- gboolean debug;
- struct rspamd_classifier_config *cfg;
-};
-
-struct classify_weight {
- const char *name;
- long double weight;
-};
-
-/* Common classifier structure */
-struct classifier {
- char *name;
- struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool,
- struct rspamd_classifier_config *cf);
- gboolean (*classify_func)(struct classifier_ctx * ctx,
- GTree *input, struct rspamd_task *task,
- lua_State *L);
- gboolean (*learn_spam_func)(struct classifier_ctx * ctx,
- GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L,
- GError **err);
-};
-
-/* Get classifier structure by name or return NULL if this name is not found */
-struct classifier * rspamd_stat_get_classifier (const char *name);
-
-/* Bayes algorithm */
-struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
- struct rspamd_classifier_config *cf);
-gboolean bayes_classify (struct classifier_ctx * ctx,
- GTree *input,
- struct rspamd_task *task,
- lua_State *L);
-gboolean bayes_learn_spam (struct classifier_ctx * ctx,
- GTree *input,
- struct rspamd_task *task,
- gboolean is_spam,
- lua_State *L,
- GError **err);
-/* Array of all defined classifiers */
-extern struct classifier classifiers[];
-
-#endif
-/*
- * vi:ts=4
- */
+++ /dev/null
-/*
- * Copyright (c) 2009-2012, Vsevolod Stakhov
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Common classifier functions
- */
-
-#include "classifiers.h"
-
-struct classifier classifiers[] = {
- {
- .name = "bayes",
- .init_func = bayes_init,
- .classify_func = bayes_classify,
- .learn_spam_func = bayes_learn_spam,
- }
-};
-
-struct classifier *
-rspamd_stat_get_classifier (const char *name)
-{
- guint i;
-
- for (i = 0; i < sizeof (classifiers) / sizeof (classifiers[0]); i++) {
- if (strcmp (classifiers[i].name, name) == 0) {
- return &classifiers[i];
- }
- }
-
- return NULL;
-}
-
-/*
- * vi:ts=4
- */
--- /dev/null
+#ifndef CLASSIFIERS_H
+#define CLASSIFIERS_H
+
+#include "config.h"
+
+/* Consider this value as 0 */
+#define ALPHA 0.0001
+
+struct rspamd_classifier_config;
+struct rspamd_task;
+
+/* Common classifier structure */
+struct classifier_ctx {
+ rspamd_mempool_t *pool;
+ GHashTable *results;
+ gboolean debug;
+ struct rspamd_classifier_config *cfg;
+};
+
+struct classifier {
+ char *name;
+ struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool,
+ struct rspamd_classifier_config *cf);
+ gboolean (*classify_func)(struct classifier_ctx * ctx,
+ GTree *input, struct rspamd_task *task,
+ lua_State *L);
+ gboolean (*learn_spam_func)(struct classifier_ctx * ctx,
+ GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L,
+ GError **err);
+};
+
+/* Bayes algorithm */
+struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
+ struct rspamd_classifier_config *cf);
+gboolean bayes_classify (struct classifier_ctx * ctx,
+ GTree *input,
+ struct rspamd_task *task,
+ lua_State *L);
+gboolean bayes_learn_spam (struct classifier_ctx * ctx,
+ GTree *input,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ lua_State *L,
+ GError **err);
+
+#endif
+/*
+ * vi:ts=4
+ */
* High level statistics API
*/
-struct rspamd_statfile_runtime {
- struct rspamd_statfile_config *st;
- guint64 hits;
- guint64 total_hits;
-};
-
-struct rspamd_classifier_runtime {
- double ham_prob;
- double spam_prob;
- guint64 total_spam;
- guint64 total_ham;
- guint64 processed_tokens;
- gsize max_tokens;
-};
-
-struct rspamd_token_result {
- double value;
- struct rspamd_statfile_runtime *st_runtime;
-
- struct rspamd_classifier_runtime *cl_runtime;
-};
-
-#define RSPAMD_MAX_TOKEN_LEN 64
-typedef struct token_node_s {
- guchar data[RSPAMD_MAX_TOKEN_LEN];
- guint datalen;
- GArray *results;
-} rspamd_token_t;
-
/**
* Initialise statistics modules
* @param cfg
#include "stat_api.h"
#include "main.h"
#include "cfg_rcl.h"
+#include "stat_internal.h"
+#include "backends/mmaped_file.h"
+static struct rspamd_stat_ctx *stat_ctx = NULL;
+
+static struct classifier classifiers[] = {
+ {
+ .name = "bayes",
+ .init_func = bayes_init,
+ .classify_func = bayes_classify,
+ .learn_spam_func = bayes_learn_spam,
+ }
+};
+
+static struct tokenizer tokenizers[] = {
+ {"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word},
+};
+
+struct rspamd_stat_backend statfile_backends[] = {
+ {
+ .name = RSPAMD_DEFAULT_BACKEND,
+ .init = rspamd_mmaped_file_init,
+ }
+};
+
+
+void
+rspamd_stat_init (struct rspamd_config *cfg)
+{
+
+}
--- /dev/null
+/* Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef STAT_INTERNAL_H_
+#define STAT_INTERNAL_H_
+
+#include "config.h"
+#include "task.h"
+#include "classifiers/classifiers.h"
+#include "tokenizers/tokenizers.h"
+#include "backends/backends.h"
+
+struct rspamd_statfile_runtime {
+ struct rspamd_statfile_config *st;
+ guint64 hits;
+ guint64 total_hits;
+};
+
+struct rspamd_classifier_runtime {
+ double ham_prob;
+ double spam_prob;
+ guint64 total_spam;
+ guint64 total_ham;
+ guint64 processed_tokens;
+ gsize max_tokens;
+};
+
+struct rspamd_token_result {
+ double value;
+ struct rspamd_statfile_runtime *st_runtime;
+
+ struct rspamd_classifier_runtime *cl_runtime;
+};
+
+#define RSPAMD_MAX_TOKEN_LEN 64
+typedef struct token_node_s {
+ guchar data[RSPAMD_MAX_TOKEN_LEN];
+ guint datalen;
+ GArray *results;
+} rspamd_token_t;
+
+struct rspamd_stat_ctx {
+ struct classifier *classifiers;
+ guint classifiers_count;
+ struct tokenizer *tokenizers;
+ guint tokenizers_count;
+ struct rspamd_stat_backend *backends;
+ guint backends_count;
+};
+
+#endif /* STAT_INTERNAL_H_ */
+++ /dev/null
-#ifndef TOKENIZERS_H
-#define TOKENIZERS_H
-
-#include "config.h"
-#include "mem_pool.h"
-#include "fstring.h"
-#include "main.h"
-#include "stat_api.h"
-
-/* Common tokenizer structure */
-struct tokenizer {
- gchar *name;
- gint (*tokenize_func)(struct tokenizer *tokenizer,
- rspamd_mempool_t *pool,
- GArray *words,
- GTree **cur,
- gboolean save_token,
- gboolean is_utf,
- GList *exceptions);
- gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions);
-};
-
-/* Compare two token nodes */
-int token_node_compare_func (gconstpointer a, gconstpointer b);
-
-/* Get tokenizer structure by name or return NULL if this name is not found */
-struct tokenizer * rspamd_stat_get_tokenizer (const char *name);
-
-/* Get next word from specified f_str_t buf */
-gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf,
- rspamd_fstring_t *token, GList **exceptions);
-
-/* Tokenize text into array of words (rspamd_fstring_t type) */
-GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf,
- gsize min_len, GList **exceptions);
-
-/* OSB tokenize function */
-int osb_tokenize_text (struct tokenizer *tokenizer,
- rspamd_mempool_t *pool,
- GArray *input,
- GTree **cur,
- gboolean save_token,
- gboolean is_utf,
- GList *exceptions);
-
-/* Make tokens for a subject */
-void tokenize_subject (struct rspamd_task *task, GTree ** tree);
-
-/* Array of all defined tokenizers */
-extern struct tokenizer tokenizers[];
-
-#endif
-/*
- * vi:ts=4
- */
#include "main.h"
#include "tokenizers.h"
-struct tokenizer tokenizers[] = {
- {"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word},
-};
-
const int primes[] = {
1, 7,
3, 13,
0, 0, 0, 0, 0
};
-struct tokenizer *
-rspamd_stat_get_tokenizer (const char *name)
-{
- guint i;
-
- for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) {
- if (strcmp (tokenizers[i].name, name) == 0) {
- return &tokenizers[i];
- }
- }
-
- return NULL;
-}
-
int
token_node_compare_func (gconstpointer a, gconstpointer b)
{
--- /dev/null
+#ifndef TOKENIZERS_H
+#define TOKENIZERS_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "fstring.h"
+#include "main.h"
+#include "stat_api.h"
+
+/* Common tokenizer structure */
+struct tokenizer {
+ gchar *name;
+ gint (*tokenize_func)(struct tokenizer *tokenizer,
+ rspamd_mempool_t *pool,
+ GArray *words,
+ GTree **cur,
+ gboolean save_token,
+ gboolean is_utf,
+ GList *exceptions);
+ gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions);
+};
+
+/* Compare two token nodes */
+int token_node_compare_func (gconstpointer a, gconstpointer b);
+
+/* Get next word from specified f_str_t buf */
+gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf,
+ rspamd_fstring_t *token, GList **exceptions);
+
+/* Tokenize text into array of words (rspamd_fstring_t type) */
+GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf,
+ gsize min_len, GList **exceptions);
+
+/* OSB tokenize function */
+int osb_tokenize_text (struct tokenizer *tokenizer,
+ rspamd_mempool_t *pool,
+ GArray *input,
+ GTree **cur,
+ gboolean save_token,
+ gboolean is_utf,
+ GList *exceptions);
+
+/* Make tokens for a subject */
+void tokenize_subject (struct rspamd_task *task, GTree ** tree);
+
+#endif
+/*
+ * vi:ts=4
+ */