Browse Source

Reorganize libstat API.

tags/0.9.0
Vsevolod Stakhov 9 years ago
parent
commit
8a05515078

+ 3
- 6
src/libstat/CMakeLists.txt View File

# Librspamdserver # Librspamdserver
SET(LIBSTATSRC
stat_config.c)
SET(LIBSTATSRC stat_config.c)
SET(TOKENIZERSSRC tokenizers/tokenizers.c SET(TOKENIZERSSRC tokenizers/tokenizers.c
tokenizers/osb.c) tokenizers/osb.c)


SET(CLASSIFIERSSRC classifiers/classifiers.c
classifiers/bayes.c)
SET(CLASSIFIERSSRC classifiers/bayes.c)
SET(BACKENDSSRC backends/backends.c
backends/mmaped_file.c)
SET(BACKENDSSRC backends/mmaped_file.c)
ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC}
${TOKENIZERSSRC} ${TOKENIZERSSRC}

+ 0
- 50
src/libstat/backends/backends.c View File

/*
* Copyright (c) 2015, Vsevolod Stakhov
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include "main.h"
#include "backends.h"
#include "mmaped_file.h"

struct rspamd_stat_backend statfile_backends[] = {
{
.name = RSPAMD_DEFAULT_BACKEND,
.init = rspamd_mmaped_file_init,
}
};


struct rspamd_stat_backend *
rspamd_stat_get_backend (const char *name)
{
guint i;

for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) {
if (strcmp (statfile_backends[i].name, name) == 0) {
return &statfile_backends[i];
}
}

return NULL;
}

src/libstat/backends.h → src/libstat/backends/backends.h View File

#define BACKENDS_H_ #define BACKENDS_H_


#include "config.h" #include "config.h"
#include "cfg_file.h"


#define RSPAMD_DEFAULT_BACKEND "mmap" #define RSPAMD_DEFAULT_BACKEND "mmap"


/* Forwarded declarations */
struct rspamd_classifier_config;
struct rspamd_statfile_config;
struct rspamd_config;

struct rspamd_stat_backend { struct rspamd_stat_backend {
const char *name; const char *name;
gpointer (*init)(struct rspamd_statfile_config *cfg); gpointer (*init)(struct rspamd_statfile_config *cfg);
gpointer ctx; gpointer ctx;
}; };


extern struct rspamd_stat_backend statfile_backends[];

struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name);
gpointer rspamd_mmaped_file_init(struct rspamd_config *cfg);


#endif /* BACKENDS_H_ */ #endif /* BACKENDS_H_ */

+ 0
- 1
src/libstat/backends/mmaped_file.c View File



#include "config.h" #include "config.h"


#include "mmaped_file.h"
#include "main.h" #include "main.h"


#define CHAIN_LENGTH 128 #define CHAIN_LENGTH 128

+ 0
- 20
src/libstat/backends/mmaped_file.h View File

/**
* @file statfile.h
* Describes common methods for accessing statistics files and caching them in memory
*/

#ifndef RSPAMD_STATFILE_H
#define RSPAMD_STATFILE_H

#include "config.h"


/* Forwarded declarations */
struct rspamd_classifier_config;
struct rspamd_statfile_config;
struct rspamd_config;

gpointer
rspamd_mmaped_file_init(struct rspamd_config *cfg);

#endif

+ 0
- 56
src/libstat/classifiers/classifiers.c View File

/*
* Copyright (c) 2009-2012, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/*
* Common classifier functions
*/

#include "classifiers.h"

struct classifier classifiers[] = {
{
.name = "bayes",
.init_func = bayes_init,
.classify_func = bayes_classify,
.learn_spam_func = bayes_learn_spam,
}
};

struct classifier *
rspamd_stat_get_classifier (const char *name)
{
guint i;

for (i = 0; i < sizeof (classifiers) / sizeof (classifiers[0]); i++) {
if (strcmp (classifiers[i].name, name) == 0) {
return &classifiers[i];
}
}

return NULL;
}

/*
* vi:ts=4
*/

src/libstat/classifiers.h → src/libstat/classifiers/classifiers.h View File

#define CLASSIFIERS_H #define CLASSIFIERS_H


#include "config.h" #include "config.h"
#include "mem_pool.h"
#include "tokenizers.h"
#include <lua.h>


/* Consider this value as 0 */ /* Consider this value as 0 */
#define ALPHA 0.0001 #define ALPHA 0.0001
struct rspamd_classifier_config; struct rspamd_classifier_config;
struct rspamd_task; struct rspamd_task;


/* Common classifier structure */
struct classifier_ctx { struct classifier_ctx {
rspamd_mempool_t *pool; rspamd_mempool_t *pool;
GHashTable *results; GHashTable *results;
struct rspamd_classifier_config *cfg; struct rspamd_classifier_config *cfg;
}; };


struct classify_weight {
const char *name;
long double weight;
};

/* Common classifier structure */
struct classifier { struct classifier {
char *name; char *name;
struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool, struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool,
GError **err); GError **err);
}; };


/* Get classifier structure by name or return NULL if this name is not found */
struct classifier * rspamd_stat_get_classifier (const char *name);

/* Bayes algorithm */ /* Bayes algorithm */
struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
struct rspamd_classifier_config *cf); struct rspamd_classifier_config *cf);
gboolean is_spam, gboolean is_spam,
lua_State *L, lua_State *L,
GError **err); GError **err);
/* Array of all defined classifiers */
extern struct classifier classifiers[];


#endif #endif
/* /*

+ 0
- 29
src/libstat/stat_api.h View File

* High level statistics API * High level statistics API
*/ */


struct rspamd_statfile_runtime {
struct rspamd_statfile_config *st;
guint64 hits;
guint64 total_hits;
};

struct rspamd_classifier_runtime {
double ham_prob;
double spam_prob;
guint64 total_spam;
guint64 total_ham;
guint64 processed_tokens;
gsize max_tokens;
};

struct rspamd_token_result {
double value;
struct rspamd_statfile_runtime *st_runtime;

struct rspamd_classifier_runtime *cl_runtime;
};

#define RSPAMD_MAX_TOKEN_LEN 64
typedef struct token_node_s {
guchar data[RSPAMD_MAX_TOKEN_LEN];
guint datalen;
GArray *results;
} rspamd_token_t;

/** /**
* Initialise statistics modules * Initialise statistics modules
* @param cfg * @param cfg

+ 30
- 0
src/libstat/stat_config.c View File

#include "stat_api.h" #include "stat_api.h"
#include "main.h" #include "main.h"
#include "cfg_rcl.h" #include "cfg_rcl.h"
#include "stat_internal.h"
#include "backends/mmaped_file.h"


static struct rspamd_stat_ctx *stat_ctx = NULL;

static struct classifier classifiers[] = {
{
.name = "bayes",
.init_func = bayes_init,
.classify_func = bayes_classify,
.learn_spam_func = bayes_learn_spam,
}
};

static struct tokenizer tokenizers[] = {
{"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word},
};

struct rspamd_stat_backend statfile_backends[] = {
{
.name = RSPAMD_DEFAULT_BACKEND,
.init = rspamd_mmaped_file_init,
}
};


void
rspamd_stat_init (struct rspamd_config *cfg)
{

}

+ 70
- 0
src/libstat/stat_internal.h View File

/* Copyright (c) 2015, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef STAT_INTERNAL_H_
#define STAT_INTERNAL_H_

#include "config.h"
#include "task.h"
#include "classifiers/classifiers.h"
#include "tokenizers/tokenizers.h"
#include "backends/backends.h"

struct rspamd_statfile_runtime {
struct rspamd_statfile_config *st;
guint64 hits;
guint64 total_hits;
};

struct rspamd_classifier_runtime {
double ham_prob;
double spam_prob;
guint64 total_spam;
guint64 total_ham;
guint64 processed_tokens;
gsize max_tokens;
};

struct rspamd_token_result {
double value;
struct rspamd_statfile_runtime *st_runtime;

struct rspamd_classifier_runtime *cl_runtime;
};

#define RSPAMD_MAX_TOKEN_LEN 64
typedef struct token_node_s {
guchar data[RSPAMD_MAX_TOKEN_LEN];
guint datalen;
GArray *results;
} rspamd_token_t;

struct rspamd_stat_ctx {
struct classifier *classifiers;
guint classifiers_count;
struct tokenizer *tokenizers;
guint tokenizers_count;
struct rspamd_stat_backend *backends;
guint backends_count;
};

#endif /* STAT_INTERNAL_H_ */

+ 0
- 18
src/libstat/tokenizers/tokenizers.c View File

#include "main.h" #include "main.h"
#include "tokenizers.h" #include "tokenizers.h"


struct tokenizer tokenizers[] = {
{"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word},
};

const int primes[] = { const int primes[] = {
1, 7, 1, 7,
3, 13, 3, 13,
0, 0, 0, 0, 0 0, 0, 0, 0, 0
}; };


struct tokenizer *
rspamd_stat_get_tokenizer (const char *name)
{
guint i;

for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) {
if (strcmp (tokenizers[i].name, name) == 0) {
return &tokenizers[i];
}
}

return NULL;
}

int int
token_node_compare_func (gconstpointer a, gconstpointer b) token_node_compare_func (gconstpointer a, gconstpointer b)
{ {

src/libstat/tokenizers.h → src/libstat/tokenizers/tokenizers.h View File

/* Compare two token nodes */ /* Compare two token nodes */
int token_node_compare_func (gconstpointer a, gconstpointer b); int token_node_compare_func (gconstpointer a, gconstpointer b);


/* Get tokenizer structure by name or return NULL if this name is not found */
struct tokenizer * rspamd_stat_get_tokenizer (const char *name);

/* Get next word from specified f_str_t buf */ /* Get next word from specified f_str_t buf */
gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf,
rspamd_fstring_t *token, GList **exceptions); rspamd_fstring_t *token, GList **exceptions);
/* Make tokens for a subject */ /* Make tokens for a subject */
void tokenize_subject (struct rspamd_task *task, GTree ** tree); void tokenize_subject (struct rspamd_task *task, GTree ** tree);


/* Array of all defined tokenizers */
extern struct tokenizer tokenizers[];

#endif #endif
/* /*
* vi:ts=4 * vi:ts=4

Loading…
Cancel
Save