# Librspamdserver | # Librspamdserver | ||||
SET(LIBSTATSRC | |||||
stat_config.c) | |||||
SET(LIBSTATSRC stat_config.c) | |||||
SET(TOKENIZERSSRC tokenizers/tokenizers.c | SET(TOKENIZERSSRC tokenizers/tokenizers.c | ||||
tokenizers/osb.c) | tokenizers/osb.c) | ||||
SET(CLASSIFIERSSRC classifiers/classifiers.c | |||||
classifiers/bayes.c) | |||||
SET(CLASSIFIERSSRC classifiers/bayes.c) | |||||
SET(BACKENDSSRC backends/backends.c | |||||
backends/mmaped_file.c) | |||||
SET(BACKENDSSRC backends/mmaped_file.c) | |||||
ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} | ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} | ||||
${TOKENIZERSSRC} | ${TOKENIZERSSRC} |
/* | |||||
* Copyright (c) 2015, Vsevolod Stakhov | |||||
* | |||||
* All rights reserved. | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions are met: | |||||
* * Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* * Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY | |||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||||
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY | |||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*/ | |||||
#include "main.h" | |||||
#include "backends.h" | |||||
#include "mmaped_file.h" | |||||
struct rspamd_stat_backend statfile_backends[] = { | |||||
{ | |||||
.name = RSPAMD_DEFAULT_BACKEND, | |||||
.init = rspamd_mmaped_file_init, | |||||
} | |||||
}; | |||||
struct rspamd_stat_backend * | |||||
rspamd_stat_get_backend (const char *name) | |||||
{ | |||||
guint i; | |||||
for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) { | |||||
if (strcmp (statfile_backends[i].name, name) == 0) { | |||||
return &statfile_backends[i]; | |||||
} | |||||
} | |||||
return NULL; | |||||
} |
#define BACKENDS_H_ | #define BACKENDS_H_ | ||||
#include "config.h" | #include "config.h" | ||||
#include "cfg_file.h" | |||||
#define RSPAMD_DEFAULT_BACKEND "mmap" | #define RSPAMD_DEFAULT_BACKEND "mmap" | ||||
/* Forwarded declarations */ | |||||
struct rspamd_classifier_config; | |||||
struct rspamd_statfile_config; | |||||
struct rspamd_config; | |||||
struct rspamd_stat_backend { | struct rspamd_stat_backend { | ||||
const char *name; | const char *name; | ||||
gpointer (*init)(struct rspamd_statfile_config *cfg); | gpointer (*init)(struct rspamd_statfile_config *cfg); | ||||
gpointer ctx; | gpointer ctx; | ||||
}; | }; | ||||
extern struct rspamd_stat_backend statfile_backends[]; | |||||
struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name); | |||||
gpointer rspamd_mmaped_file_init(struct rspamd_config *cfg); | |||||
#endif /* BACKENDS_H_ */ | #endif /* BACKENDS_H_ */ |
#include "config.h" | #include "config.h" | ||||
#include "mmaped_file.h" | |||||
#include "main.h" | #include "main.h" | ||||
#define CHAIN_LENGTH 128 | #define CHAIN_LENGTH 128 |
/** | |||||
* @file statfile.h | |||||
* Describes common methods for accessing statistics files and caching them in memory | |||||
*/ | |||||
#ifndef RSPAMD_STATFILE_H | |||||
#define RSPAMD_STATFILE_H | |||||
#include "config.h" | |||||
/* Forwarded declarations */ | |||||
struct rspamd_classifier_config; | |||||
struct rspamd_statfile_config; | |||||
struct rspamd_config; | |||||
gpointer | |||||
rspamd_mmaped_file_init(struct rspamd_config *cfg); | |||||
#endif |
/* | |||||
* Copyright (c) 2009-2012, Vsevolod Stakhov | |||||
* All rights reserved. | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions are met: | |||||
* * Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* * Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY | |||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||||
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY | |||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*/ | |||||
/* | |||||
* Common classifier functions | |||||
*/ | |||||
#include "classifiers.h" | |||||
struct classifier classifiers[] = { | |||||
{ | |||||
.name = "bayes", | |||||
.init_func = bayes_init, | |||||
.classify_func = bayes_classify, | |||||
.learn_spam_func = bayes_learn_spam, | |||||
} | |||||
}; | |||||
struct classifier * | |||||
rspamd_stat_get_classifier (const char *name) | |||||
{ | |||||
guint i; | |||||
for (i = 0; i < sizeof (classifiers) / sizeof (classifiers[0]); i++) { | |||||
if (strcmp (classifiers[i].name, name) == 0) { | |||||
return &classifiers[i]; | |||||
} | |||||
} | |||||
return NULL; | |||||
} | |||||
/* | |||||
* vi:ts=4 | |||||
*/ |
#define CLASSIFIERS_H | #define CLASSIFIERS_H | ||||
#include "config.h" | #include "config.h" | ||||
#include "mem_pool.h" | |||||
#include "tokenizers.h" | |||||
#include <lua.h> | |||||
/* Consider this value as 0 */ | /* Consider this value as 0 */ | ||||
#define ALPHA 0.0001 | #define ALPHA 0.0001 | ||||
struct rspamd_classifier_config; | struct rspamd_classifier_config; | ||||
struct rspamd_task; | struct rspamd_task; | ||||
/* Common classifier structure */ | |||||
struct classifier_ctx { | struct classifier_ctx { | ||||
rspamd_mempool_t *pool; | rspamd_mempool_t *pool; | ||||
GHashTable *results; | GHashTable *results; | ||||
struct rspamd_classifier_config *cfg; | struct rspamd_classifier_config *cfg; | ||||
}; | }; | ||||
struct classify_weight { | |||||
const char *name; | |||||
long double weight; | |||||
}; | |||||
/* Common classifier structure */ | |||||
struct classifier { | struct classifier { | ||||
char *name; | char *name; | ||||
struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool, | struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool, | ||||
GError **err); | GError **err); | ||||
}; | }; | ||||
/* Get classifier structure by name or return NULL if this name is not found */ | |||||
struct classifier * rspamd_stat_get_classifier (const char *name); | |||||
/* Bayes algorithm */ | /* Bayes algorithm */ | ||||
struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, | struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, | ||||
struct rspamd_classifier_config *cf); | struct rspamd_classifier_config *cf); | ||||
gboolean is_spam, | gboolean is_spam, | ||||
lua_State *L, | lua_State *L, | ||||
GError **err); | GError **err); | ||||
/* Array of all defined classifiers */ | |||||
extern struct classifier classifiers[]; | |||||
#endif | #endif | ||||
/* | /* |
* High level statistics API | * High level statistics API | ||||
*/ | */ | ||||
struct rspamd_statfile_runtime { | |||||
struct rspamd_statfile_config *st; | |||||
guint64 hits; | |||||
guint64 total_hits; | |||||
}; | |||||
struct rspamd_classifier_runtime { | |||||
double ham_prob; | |||||
double spam_prob; | |||||
guint64 total_spam; | |||||
guint64 total_ham; | |||||
guint64 processed_tokens; | |||||
gsize max_tokens; | |||||
}; | |||||
struct rspamd_token_result { | |||||
double value; | |||||
struct rspamd_statfile_runtime *st_runtime; | |||||
struct rspamd_classifier_runtime *cl_runtime; | |||||
}; | |||||
#define RSPAMD_MAX_TOKEN_LEN 64 | |||||
typedef struct token_node_s { | |||||
guchar data[RSPAMD_MAX_TOKEN_LEN]; | |||||
guint datalen; | |||||
GArray *results; | |||||
} rspamd_token_t; | |||||
/** | /** | ||||
* Initialise statistics modules | * Initialise statistics modules | ||||
* @param cfg | * @param cfg |
#include "stat_api.h" | #include "stat_api.h" | ||||
#include "main.h" | #include "main.h" | ||||
#include "cfg_rcl.h" | #include "cfg_rcl.h" | ||||
#include "stat_internal.h" | |||||
#include "backends/mmaped_file.h" | |||||
static struct rspamd_stat_ctx *stat_ctx = NULL; | |||||
static struct classifier classifiers[] = { | |||||
{ | |||||
.name = "bayes", | |||||
.init_func = bayes_init, | |||||
.classify_func = bayes_classify, | |||||
.learn_spam_func = bayes_learn_spam, | |||||
} | |||||
}; | |||||
static struct tokenizer tokenizers[] = { | |||||
{"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word}, | |||||
}; | |||||
struct rspamd_stat_backend statfile_backends[] = { | |||||
{ | |||||
.name = RSPAMD_DEFAULT_BACKEND, | |||||
.init = rspamd_mmaped_file_init, | |||||
} | |||||
}; | |||||
void | |||||
rspamd_stat_init (struct rspamd_config *cfg) | |||||
{ | |||||
} |
/* Copyright (c) 2015, Vsevolod Stakhov | |||||
* All rights reserved. | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions are met: | |||||
* * Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* * Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY | |||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||||
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY | |||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*/ | |||||
#ifndef STAT_INTERNAL_H_ | |||||
#define STAT_INTERNAL_H_ | |||||
#include "config.h" | |||||
#include "task.h" | |||||
#include "classifiers/classifiers.h" | |||||
#include "tokenizers/tokenizers.h" | |||||
#include "backends/backends.h" | |||||
struct rspamd_statfile_runtime { | |||||
struct rspamd_statfile_config *st; | |||||
guint64 hits; | |||||
guint64 total_hits; | |||||
}; | |||||
struct rspamd_classifier_runtime { | |||||
double ham_prob; | |||||
double spam_prob; | |||||
guint64 total_spam; | |||||
guint64 total_ham; | |||||
guint64 processed_tokens; | |||||
gsize max_tokens; | |||||
}; | |||||
struct rspamd_token_result { | |||||
double value; | |||||
struct rspamd_statfile_runtime *st_runtime; | |||||
struct rspamd_classifier_runtime *cl_runtime; | |||||
}; | |||||
#define RSPAMD_MAX_TOKEN_LEN 64 | |||||
typedef struct token_node_s { | |||||
guchar data[RSPAMD_MAX_TOKEN_LEN]; | |||||
guint datalen; | |||||
GArray *results; | |||||
} rspamd_token_t; | |||||
struct rspamd_stat_ctx { | |||||
struct classifier *classifiers; | |||||
guint classifiers_count; | |||||
struct tokenizer *tokenizers; | |||||
guint tokenizers_count; | |||||
struct rspamd_stat_backend *backends; | |||||
guint backends_count; | |||||
}; | |||||
#endif /* STAT_INTERNAL_H_ */ |
#include "main.h" | #include "main.h" | ||||
#include "tokenizers.h" | #include "tokenizers.h" | ||||
struct tokenizer tokenizers[] = { | |||||
{"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word}, | |||||
}; | |||||
const int primes[] = { | const int primes[] = { | ||||
1, 7, | 1, 7, | ||||
3, 13, | 3, 13, | ||||
0, 0, 0, 0, 0 | 0, 0, 0, 0, 0 | ||||
}; | }; | ||||
struct tokenizer * | |||||
rspamd_stat_get_tokenizer (const char *name) | |||||
{ | |||||
guint i; | |||||
for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) { | |||||
if (strcmp (tokenizers[i].name, name) == 0) { | |||||
return &tokenizers[i]; | |||||
} | |||||
} | |||||
return NULL; | |||||
} | |||||
int | int | ||||
token_node_compare_func (gconstpointer a, gconstpointer b) | token_node_compare_func (gconstpointer a, gconstpointer b) | ||||
{ | { |
/* Compare two token nodes */ | /* Compare two token nodes */ | ||||
int token_node_compare_func (gconstpointer a, gconstpointer b); | int token_node_compare_func (gconstpointer a, gconstpointer b); | ||||
/* Get tokenizer structure by name or return NULL if this name is not found */ | |||||
struct tokenizer * rspamd_stat_get_tokenizer (const char *name); | |||||
/* Get next word from specified f_str_t buf */ | /* Get next word from specified f_str_t buf */ | ||||
gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, | gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, | ||||
rspamd_fstring_t *token, GList **exceptions); | rspamd_fstring_t *token, GList **exceptions); | ||||
/* Make tokens for a subject */ | /* Make tokens for a subject */ | ||||
void tokenize_subject (struct rspamd_task *task, GTree ** tree); | void tokenize_subject (struct rspamd_task *task, GTree ** tree); | ||||
/* Array of all defined tokenizers */ | |||||
extern struct tokenizer tokenizers[]; | |||||
#endif | #endif | ||||
/* | /* | ||||
* vi:ts=4 | * vi:ts=4 |