@@ -1,14 +1,11 @@ | |||
# Librspamdserver | |||
SET(LIBSTATSRC | |||
stat_config.c) | |||
SET(LIBSTATSRC stat_config.c) | |||
SET(TOKENIZERSSRC tokenizers/tokenizers.c | |||
tokenizers/osb.c) | |||
SET(CLASSIFIERSSRC classifiers/classifiers.c | |||
classifiers/bayes.c) | |||
SET(CLASSIFIERSSRC classifiers/bayes.c) | |||
SET(BACKENDSSRC backends/backends.c | |||
backends/mmaped_file.c) | |||
SET(BACKENDSSRC backends/mmaped_file.c) | |||
ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} | |||
${TOKENIZERSSRC} |
@@ -1,50 +0,0 @@ | |||
/* | |||
* Copyright (c) 2015, Vsevolod Stakhov | |||
* | |||
* All rights reserved. | |||
* | |||
* Redistribution and use in source and binary forms, with or without | |||
* modification, are permitted provided that the following conditions are met: | |||
* * Redistributions of source code must retain the above copyright | |||
* notice, this list of conditions and the following disclaimer. | |||
* * Redistributions in binary form must reproduce the above copyright | |||
* notice, this list of conditions and the following disclaimer in the | |||
* documentation and/or other materials provided with the distribution. | |||
* | |||
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY | |||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY | |||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | |||
#include "main.h" | |||
#include "backends.h" | |||
#include "mmaped_file.h" | |||
struct rspamd_stat_backend statfile_backends[] = { | |||
{ | |||
.name = RSPAMD_DEFAULT_BACKEND, | |||
.init = rspamd_mmaped_file_init, | |||
} | |||
}; | |||
struct rspamd_stat_backend * | |||
rspamd_stat_get_backend (const char *name) | |||
{ | |||
guint i; | |||
for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) { | |||
if (strcmp (statfile_backends[i].name, name) == 0) { | |||
return &statfile_backends[i]; | |||
} | |||
} | |||
return NULL; | |||
} |
@@ -26,18 +26,20 @@ | |||
#define BACKENDS_H_ | |||
#include "config.h" | |||
#include "cfg_file.h" | |||
#define RSPAMD_DEFAULT_BACKEND "mmap" | |||
/* Forwarded declarations */ | |||
struct rspamd_classifier_config; | |||
struct rspamd_statfile_config; | |||
struct rspamd_config; | |||
struct rspamd_stat_backend { | |||
const char *name; | |||
gpointer (*init)(struct rspamd_statfile_config *cfg); | |||
gpointer ctx; | |||
}; | |||
extern struct rspamd_stat_backend statfile_backends[]; | |||
struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name); | |||
gpointer rspamd_mmaped_file_init(struct rspamd_config *cfg); | |||
#endif /* BACKENDS_H_ */ |
@@ -24,7 +24,6 @@ | |||
#include "config.h" | |||
#include "mmaped_file.h" | |||
#include "main.h" | |||
#define CHAIN_LENGTH 128 |
@@ -1,20 +0,0 @@ | |||
/** | |||
* @file statfile.h | |||
* Describes common methods for accessing statistics files and caching them in memory | |||
*/ | |||
#ifndef RSPAMD_STATFILE_H | |||
#define RSPAMD_STATFILE_H | |||
#include "config.h" | |||
/* Forwarded declarations */ | |||
struct rspamd_classifier_config; | |||
struct rspamd_statfile_config; | |||
struct rspamd_config; | |||
gpointer | |||
rspamd_mmaped_file_init(struct rspamd_config *cfg); | |||
#endif |
@@ -1,56 +0,0 @@ | |||
/* | |||
* Copyright (c) 2009-2012, Vsevolod Stakhov | |||
* All rights reserved. | |||
* | |||
* Redistribution and use in source and binary forms, with or without | |||
* modification, are permitted provided that the following conditions are met: | |||
* * Redistributions of source code must retain the above copyright | |||
* notice, this list of conditions and the following disclaimer. | |||
* * Redistributions in binary form must reproduce the above copyright | |||
* notice, this list of conditions and the following disclaimer in the | |||
* documentation and/or other materials provided with the distribution. | |||
* | |||
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY | |||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY | |||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | |||
/* | |||
* Common classifier functions | |||
*/ | |||
#include "classifiers.h" | |||
struct classifier classifiers[] = { | |||
{ | |||
.name = "bayes", | |||
.init_func = bayes_init, | |||
.classify_func = bayes_classify, | |||
.learn_spam_func = bayes_learn_spam, | |||
} | |||
}; | |||
struct classifier * | |||
rspamd_stat_get_classifier (const char *name) | |||
{ | |||
guint i; | |||
for (i = 0; i < sizeof (classifiers) / sizeof (classifiers[0]); i++) { | |||
if (strcmp (classifiers[i].name, name) == 0) { | |||
return &classifiers[i]; | |||
} | |||
} | |||
return NULL; | |||
} | |||
/* | |||
* vi:ts=4 | |||
*/ |
@@ -2,9 +2,6 @@ | |||
#define CLASSIFIERS_H | |||
#include "config.h" | |||
#include "mem_pool.h" | |||
#include "tokenizers.h" | |||
#include <lua.h> | |||
/* Consider this value as 0 */ | |||
#define ALPHA 0.0001 | |||
@@ -12,6 +9,7 @@ | |||
struct rspamd_classifier_config; | |||
struct rspamd_task; | |||
/* Common classifier structure */ | |||
struct classifier_ctx { | |||
rspamd_mempool_t *pool; | |||
GHashTable *results; | |||
@@ -19,12 +17,6 @@ struct classifier_ctx { | |||
struct rspamd_classifier_config *cfg; | |||
}; | |||
struct classify_weight { | |||
const char *name; | |||
long double weight; | |||
}; | |||
/* Common classifier structure */ | |||
struct classifier { | |||
char *name; | |||
struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool, | |||
@@ -37,9 +29,6 @@ struct classifier { | |||
GError **err); | |||
}; | |||
/* Get classifier structure by name or return NULL if this name is not found */ | |||
struct classifier * rspamd_stat_get_classifier (const char *name); | |||
/* Bayes algorithm */ | |||
struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, | |||
struct rspamd_classifier_config *cf); | |||
@@ -53,8 +42,6 @@ gboolean bayes_learn_spam (struct classifier_ctx * ctx, | |||
gboolean is_spam, | |||
lua_State *L, | |||
GError **err); | |||
/* Array of all defined classifiers */ | |||
extern struct classifier classifiers[]; | |||
#endif | |||
/* |
@@ -31,35 +31,6 @@ | |||
* High level statistics API | |||
*/ | |||
struct rspamd_statfile_runtime { | |||
struct rspamd_statfile_config *st; | |||
guint64 hits; | |||
guint64 total_hits; | |||
}; | |||
struct rspamd_classifier_runtime { | |||
double ham_prob; | |||
double spam_prob; | |||
guint64 total_spam; | |||
guint64 total_ham; | |||
guint64 processed_tokens; | |||
gsize max_tokens; | |||
}; | |||
struct rspamd_token_result { | |||
double value; | |||
struct rspamd_statfile_runtime *st_runtime; | |||
struct rspamd_classifier_runtime *cl_runtime; | |||
}; | |||
#define RSPAMD_MAX_TOKEN_LEN 64 | |||
typedef struct token_node_s { | |||
guchar data[RSPAMD_MAX_TOKEN_LEN]; | |||
guint datalen; | |||
GArray *results; | |||
} rspamd_token_t; | |||
/** | |||
* Initialise statistics modules | |||
* @param cfg |
@@ -27,4 +27,34 @@ | |||
#include "stat_api.h" | |||
#include "main.h" | |||
#include "cfg_rcl.h" | |||
#include "stat_internal.h" | |||
#include "backends/mmaped_file.h" | |||
static struct rspamd_stat_ctx *stat_ctx = NULL; | |||
static struct classifier classifiers[] = { | |||
{ | |||
.name = "bayes", | |||
.init_func = bayes_init, | |||
.classify_func = bayes_classify, | |||
.learn_spam_func = bayes_learn_spam, | |||
} | |||
}; | |||
static struct tokenizer tokenizers[] = { | |||
{"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word}, | |||
}; | |||
struct rspamd_stat_backend statfile_backends[] = { | |||
{ | |||
.name = RSPAMD_DEFAULT_BACKEND, | |||
.init = rspamd_mmaped_file_init, | |||
} | |||
}; | |||
void | |||
rspamd_stat_init (struct rspamd_config *cfg) | |||
{ | |||
} |
@@ -0,0 +1,70 @@ | |||
/* Copyright (c) 2015, Vsevolod Stakhov | |||
* All rights reserved. | |||
* | |||
* Redistribution and use in source and binary forms, with or without | |||
* modification, are permitted provided that the following conditions are met: | |||
* * Redistributions of source code must retain the above copyright | |||
* notice, this list of conditions and the following disclaimer. | |||
* * Redistributions in binary form must reproduce the above copyright | |||
* notice, this list of conditions and the following disclaimer in the | |||
* documentation and/or other materials provided with the distribution. | |||
* | |||
* THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY | |||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY | |||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | |||
#ifndef STAT_INTERNAL_H_ | |||
#define STAT_INTERNAL_H_ | |||
#include "config.h" | |||
#include "task.h" | |||
#include "classifiers/classifiers.h" | |||
#include "tokenizers/tokenizers.h" | |||
#include "backends/backends.h" | |||
struct rspamd_statfile_runtime { | |||
struct rspamd_statfile_config *st; | |||
guint64 hits; | |||
guint64 total_hits; | |||
}; | |||
struct rspamd_classifier_runtime { | |||
double ham_prob; | |||
double spam_prob; | |||
guint64 total_spam; | |||
guint64 total_ham; | |||
guint64 processed_tokens; | |||
gsize max_tokens; | |||
}; | |||
struct rspamd_token_result { | |||
double value; | |||
struct rspamd_statfile_runtime *st_runtime; | |||
struct rspamd_classifier_runtime *cl_runtime; | |||
}; | |||
#define RSPAMD_MAX_TOKEN_LEN 64 | |||
typedef struct token_node_s { | |||
guchar data[RSPAMD_MAX_TOKEN_LEN]; | |||
guint datalen; | |||
GArray *results; | |||
} rspamd_token_t; | |||
struct rspamd_stat_ctx { | |||
struct classifier *classifiers; | |||
guint classifiers_count; | |||
struct tokenizer *tokenizers; | |||
guint tokenizers_count; | |||
struct rspamd_stat_backend *backends; | |||
guint backends_count; | |||
}; | |||
#endif /* STAT_INTERNAL_H_ */ |
@@ -29,10 +29,6 @@ | |||
#include "main.h" | |||
#include "tokenizers.h" | |||
struct tokenizer tokenizers[] = { | |||
{"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word}, | |||
}; | |||
const int primes[] = { | |||
1, 7, | |||
3, 13, | |||
@@ -75,20 +71,6 @@ const gchar t_delimiters[255] = { | |||
0, 0, 0, 0, 0 | |||
}; | |||
struct tokenizer * | |||
rspamd_stat_get_tokenizer (const char *name) | |||
{ | |||
guint i; | |||
for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) { | |||
if (strcmp (tokenizers[i].name, name) == 0) { | |||
return &tokenizers[i]; | |||
} | |||
} | |||
return NULL; | |||
} | |||
int | |||
token_node_compare_func (gconstpointer a, gconstpointer b) | |||
{ |
@@ -23,9 +23,6 @@ struct tokenizer { | |||
/* Compare two token nodes */ | |||
int token_node_compare_func (gconstpointer a, gconstpointer b); | |||
/* Get tokenizer structure by name or return NULL if this name is not found */ | |||
struct tokenizer * rspamd_stat_get_tokenizer (const char *name); | |||
/* Get next word from specified f_str_t buf */ | |||
gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, | |||
rspamd_fstring_t *token, GList **exceptions); | |||
@@ -46,9 +43,6 @@ int osb_tokenize_text (struct tokenizer *tokenizer, | |||
/* Make tokens for a subject */ | |||
void tokenize_subject (struct rspamd_task *task, GTree ** tree); | |||
/* Array of all defined tokenizers */ | |||
extern struct tokenizer tokenizers[]; | |||
#endif | |||
/* | |||
* vi:ts=4 |