#include "config.h"
#include "mem_pool.h"
-#include "statfile.h"
#include "tokenizers.h"
#include <lua.h>
struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool,
struct rspamd_classifier_config *cf);
gboolean (*classify_func)(struct classifier_ctx * ctx,
- statfile_pool_t *pool, GTree *input, struct rspamd_task *task,
+ GTree *input, struct rspamd_task *task,
lua_State *L);
- gboolean (*learn_func)(struct classifier_ctx * ctx, statfile_pool_t *pool,
- const char *symbol, GTree *input, gboolean in_class,
- double *sum, double multiplier, GError **err);
gboolean (*learn_spam_func)(struct classifier_ctx * ctx,
- statfile_pool_t *pool,
GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L,
GError **err);
- GList * (*weights_func)(struct classifier_ctx * ctx, statfile_pool_t *pool,
- GTree *input, struct rspamd_task *task);
};
/* Get classifier structure by name or return NULL if this name is not found */
struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
struct rspamd_classifier_config *cf);
gboolean bayes_classify (struct classifier_ctx * ctx,
- statfile_pool_t *pool,
GTree *input,
struct rspamd_task *task,
lua_State *L);
-gboolean bayes_learn (struct classifier_ctx * ctx,
- statfile_pool_t *pool,
- const char *symbol,
- GTree *input,
- gboolean in_class,
- double *sum,
- double multiplier,
- GError **err);
gboolean bayes_learn_spam (struct classifier_ctx * ctx,
- statfile_pool_t *pool,
GTree *input,
struct rspamd_task *task,
gboolean is_spam,
lua_State *L,
GError **err);
-GList * bayes_weights (struct classifier_ctx * ctx,
- statfile_pool_t *pool,
- GTree *input,
- struct rspamd_task *task);
/* Array of all defined classifiers */
extern struct classifier classifiers[];
return TRUE;
}
-gboolean
-bayes_learn (struct classifier_ctx * ctx,
- statfile_pool_t *pool,
- const char *symbol,
- GTree *input,
- gboolean in_class,
- double *sum,
- double multiplier,
- GError **err)
-{
- struct bayes_callback_data data;
- gchar *value;
- gint nodes;
- gint minnodes;
- struct rspamd_statfile_config *st, *sel_st = NULL;
- stat_file_t *to_learn;
- GList *cur;
-
- g_assert (pool != NULL);
- g_assert (ctx != NULL);
-
- if (ctx->cfg->opts &&
- (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) {
- minnodes = strtol (value, NULL, 10);
- nodes = g_tree_nnodes (input);
- if (nodes > FEATURE_WINDOW_SIZE) {
- nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE;
- }
- if (nodes < minnodes) {
- msg_info (
- "do not learn message as it has too few tokens: %d, while %d min",
- nodes,
- minnodes);
- *sum = 0;
- g_set_error (err,
- bayes_error_quark (), /* error domain */
- 1, /* error code */
- "message contains too few tokens: %d, while min is %d",
- nodes, (int)minnodes);
- return FALSE;
- }
- }
-
- data.pool = pool;
- data.in_class = in_class;
- data.now = time (NULL);
- data.ctx = ctx;
- data.processed_tokens = 0;
- data.processed_tokens = 0;
- if (ctx->cfg->opts &&
- (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) {
- minnodes = rspamd_config_parse_limit (value, -1);
- data.max_tokens = minnodes;
- }
- else {
- data.max_tokens = 0;
- }
- cur = ctx->cfg->statfiles;
- while (cur) {
- /* Select statfile to learn */
- st = cur->data;
- if (strcmp (st->symbol, symbol) == 0) {
- sel_st = st;
- break;
- }
- cur = g_list_next (cur);
- }
- if (sel_st == NULL) {
- g_set_error (err,
- bayes_error_quark (), /* error domain */
- 1, /* error code */
- "cannot find statfile for symbol: %s",
- symbol);
- return FALSE;
- }
- if ((to_learn = statfile_pool_is_open (pool, sel_st->path)) == NULL) {
- if ((to_learn =
- statfile_pool_open (pool, sel_st->path, sel_st->size,
- FALSE)) == NULL) {
- msg_warn ("cannot open %s", sel_st->path);
- if (statfile_pool_create (pool, sel_st->path, sel_st->size) == -1) {
- msg_err ("cannot create statfile %s", sel_st->path);
- g_set_error (err,
- bayes_error_quark (), /* error domain */
- 1, /* error code */
- "cannot create statfile: %s",
- sel_st->path);
- return FALSE;
- }
- if ((to_learn =
- statfile_pool_open (pool, sel_st->path, sel_st->size,
- FALSE)) == NULL) {
- g_set_error (err,
- bayes_error_quark (), /* error domain */
- 1, /* error code */
- "cannot open statfile %s after creation",
- sel_st->path);
- msg_err ("cannot open statfile %s after creation",
- sel_st->path);
- return FALSE;
- }
- }
- }
- data.file = to_learn;
- statfile_pool_lock_file (pool, data.file);
- g_tree_foreach (input, bayes_learn_callback, &data);
- statfile_inc_revision (to_learn);
- statfile_pool_unlock_file (pool, data.file);
-
- if (sum != NULL) {
- *sum = data.processed_tokens;
- }
-
- return TRUE;
-}
-
gboolean
bayes_learn_spam (struct classifier_ctx * ctx,
statfile_pool_t *pool,
return TRUE;
}
-
-GList *
-bayes_weights (struct classifier_ctx * ctx,
- statfile_pool_t *pool,
- GTree *input,
- struct rspamd_task *task)
-{
- /* This function is unimplemented with new normalizer */
- return NULL;
-}
#include "libutil/logger.h"
#include "libutil/http.h"
#include "libutil/upstream.h"
-#include "libserver/statfile.h"
#include "libserver/url.h"
#include "libserver/protocol.h"
#include "libserver/buffer.h"
struct rspamd_stat *stat; /**< pointer to statistics */
rspamd_mempool_t *server_pool; /**< server's memory pool */
- statfile_pool_t *statfile_pool; /**< shared statfiles pool */
GHashTable *workers; /**< workers pool indexed by pid */
rspamd_logger_t *logger;
uid_t workers_uid; /**< worker's uid running to */