Browse Source

Reorganize libstat API.

tags/0.9.0
Vsevolod Stakhov 9 years ago
parent
commit
8a05515078

+ 3
- 6
src/libstat/CMakeLists.txt View File

@@ -1,14 +1,11 @@
# Librspamdserver
SET(LIBSTATSRC
stat_config.c)
SET(LIBSTATSRC stat_config.c)
SET(TOKENIZERSSRC tokenizers/tokenizers.c
tokenizers/osb.c)

SET(CLASSIFIERSSRC classifiers/classifiers.c
classifiers/bayes.c)
SET(CLASSIFIERSSRC classifiers/bayes.c)
SET(BACKENDSSRC backends/backends.c
backends/mmaped_file.c)
SET(BACKENDSSRC backends/mmaped_file.c)
ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC}
${TOKENIZERSSRC}

+ 0
- 50
src/libstat/backends/backends.c View File

@@ -1,50 +0,0 @@
/*
* Copyright (c) 2015, Vsevolod Stakhov
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include "main.h"
#include "backends.h"
#include "mmaped_file.h"

struct rspamd_stat_backend statfile_backends[] = {
{
.name = RSPAMD_DEFAULT_BACKEND,
.init = rspamd_mmaped_file_init,
}
};


struct rspamd_stat_backend *
rspamd_stat_get_backend (const char *name)
{
guint i;

for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) {
if (strcmp (statfile_backends[i].name, name) == 0) {
return &statfile_backends[i];
}
}

return NULL;
}

src/libstat/backends.h → src/libstat/backends/backends.h View File

@@ -26,18 +26,20 @@
#define BACKENDS_H_

#include "config.h"
#include "cfg_file.h"

#define RSPAMD_DEFAULT_BACKEND "mmap"

/* Forwarded declarations */
struct rspamd_classifier_config;
struct rspamd_statfile_config;
struct rspamd_config;

struct rspamd_stat_backend {
const char *name;
gpointer (*init)(struct rspamd_statfile_config *cfg);
gpointer ctx;
};

extern struct rspamd_stat_backend statfile_backends[];

struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name);
gpointer rspamd_mmaped_file_init(struct rspamd_config *cfg);

#endif /* BACKENDS_H_ */

+ 0
- 1
src/libstat/backends/mmaped_file.c View File

@@ -24,7 +24,6 @@

#include "config.h"

#include "mmaped_file.h"
#include "main.h"

#define CHAIN_LENGTH 128

+ 0
- 20
src/libstat/backends/mmaped_file.h View File

@@ -1,20 +0,0 @@
/**
* @file statfile.h
* Describes common methods for accessing statistics files and caching them in memory
*/

#ifndef RSPAMD_STATFILE_H
#define RSPAMD_STATFILE_H

#include "config.h"


/* Forwarded declarations */
struct rspamd_classifier_config;
struct rspamd_statfile_config;
struct rspamd_config;

gpointer
rspamd_mmaped_file_init(struct rspamd_config *cfg);

#endif

+ 0
- 56
src/libstat/classifiers/classifiers.c View File

@@ -1,56 +0,0 @@
/*
* Copyright (c) 2009-2012, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/*
* Common classifier functions
*/

#include "classifiers.h"

struct classifier classifiers[] = {
{
.name = "bayes",
.init_func = bayes_init,
.classify_func = bayes_classify,
.learn_spam_func = bayes_learn_spam,
}
};

struct classifier *
rspamd_stat_get_classifier (const char *name)
{
guint i;

for (i = 0; i < sizeof (classifiers) / sizeof (classifiers[0]); i++) {
if (strcmp (classifiers[i].name, name) == 0) {
return &classifiers[i];
}
}

return NULL;
}

/*
* vi:ts=4
*/

src/libstat/classifiers.h → src/libstat/classifiers/classifiers.h View File

@@ -2,9 +2,6 @@
#define CLASSIFIERS_H

#include "config.h"
#include "mem_pool.h"
#include "tokenizers.h"
#include <lua.h>

/* Consider this value as 0 */
#define ALPHA 0.0001
@@ -12,6 +9,7 @@
struct rspamd_classifier_config;
struct rspamd_task;

/* Common classifier structure */
struct classifier_ctx {
rspamd_mempool_t *pool;
GHashTable *results;
@@ -19,12 +17,6 @@ struct classifier_ctx {
struct rspamd_classifier_config *cfg;
};

struct classify_weight {
const char *name;
long double weight;
};

/* Common classifier structure */
struct classifier {
char *name;
struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool,
@@ -37,9 +29,6 @@ struct classifier {
GError **err);
};

/* Get classifier structure by name or return NULL if this name is not found */
struct classifier * rspamd_stat_get_classifier (const char *name);

/* Bayes algorithm */
struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
struct rspamd_classifier_config *cf);
@@ -53,8 +42,6 @@ gboolean bayes_learn_spam (struct classifier_ctx * ctx,
gboolean is_spam,
lua_State *L,
GError **err);
/* Array of all defined classifiers */
extern struct classifier classifiers[];

#endif
/*

+ 0
- 29
src/libstat/stat_api.h View File

@@ -31,35 +31,6 @@
* High level statistics API
*/

struct rspamd_statfile_runtime {
struct rspamd_statfile_config *st;
guint64 hits;
guint64 total_hits;
};

struct rspamd_classifier_runtime {
double ham_prob;
double spam_prob;
guint64 total_spam;
guint64 total_ham;
guint64 processed_tokens;
gsize max_tokens;
};

struct rspamd_token_result {
double value;
struct rspamd_statfile_runtime *st_runtime;

struct rspamd_classifier_runtime *cl_runtime;
};

#define RSPAMD_MAX_TOKEN_LEN 64
typedef struct token_node_s {
guchar data[RSPAMD_MAX_TOKEN_LEN];
guint datalen;
GArray *results;
} rspamd_token_t;

/**
* Initialise statistics modules
* @param cfg

+ 30
- 0
src/libstat/stat_config.c View File

@@ -27,4 +27,34 @@
#include "stat_api.h"
#include "main.h"
#include "cfg_rcl.h"
#include "stat_internal.h"
#include "backends/mmaped_file.h"

static struct rspamd_stat_ctx *stat_ctx = NULL;

static struct classifier classifiers[] = {
{
.name = "bayes",
.init_func = bayes_init,
.classify_func = bayes_classify,
.learn_spam_func = bayes_learn_spam,
}
};

static struct tokenizer tokenizers[] = {
{"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word},
};

struct rspamd_stat_backend statfile_backends[] = {
{
.name = RSPAMD_DEFAULT_BACKEND,
.init = rspamd_mmaped_file_init,
}
};


void
rspamd_stat_init (struct rspamd_config *cfg)
{

}

+ 70
- 0
src/libstat/stat_internal.h View File

@@ -0,0 +1,70 @@
/* Copyright (c) 2015, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef STAT_INTERNAL_H_
#define STAT_INTERNAL_H_

#include "config.h"
#include "task.h"
#include "classifiers/classifiers.h"
#include "tokenizers/tokenizers.h"
#include "backends/backends.h"

struct rspamd_statfile_runtime {
struct rspamd_statfile_config *st;
guint64 hits;
guint64 total_hits;
};

struct rspamd_classifier_runtime {
double ham_prob;
double spam_prob;
guint64 total_spam;
guint64 total_ham;
guint64 processed_tokens;
gsize max_tokens;
};

struct rspamd_token_result {
double value;
struct rspamd_statfile_runtime *st_runtime;

struct rspamd_classifier_runtime *cl_runtime;
};

#define RSPAMD_MAX_TOKEN_LEN 64
typedef struct token_node_s {
guchar data[RSPAMD_MAX_TOKEN_LEN];
guint datalen;
GArray *results;
} rspamd_token_t;

struct rspamd_stat_ctx {
struct classifier *classifiers;
guint classifiers_count;
struct tokenizer *tokenizers;
guint tokenizers_count;
struct rspamd_stat_backend *backends;
guint backends_count;
};

#endif /* STAT_INTERNAL_H_ */

+ 0
- 18
src/libstat/tokenizers/tokenizers.c View File

@@ -29,10 +29,6 @@
#include "main.h"
#include "tokenizers.h"

struct tokenizer tokenizers[] = {
{"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word},
};

const int primes[] = {
1, 7,
3, 13,
@@ -75,20 +71,6 @@ const gchar t_delimiters[255] = {
0, 0, 0, 0, 0
};

struct tokenizer *
rspamd_stat_get_tokenizer (const char *name)
{
guint i;

for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) {
if (strcmp (tokenizers[i].name, name) == 0) {
return &tokenizers[i];
}
}

return NULL;
}

int
token_node_compare_func (gconstpointer a, gconstpointer b)
{

src/libstat/tokenizers.h → src/libstat/tokenizers/tokenizers.h View File

@@ -23,9 +23,6 @@ struct tokenizer {
/* Compare two token nodes */
int token_node_compare_func (gconstpointer a, gconstpointer b);

/* Get tokenizer structure by name or return NULL if this name is not found */
struct tokenizer * rspamd_stat_get_tokenizer (const char *name);

/* Get next word from specified f_str_t buf */
gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf,
rspamd_fstring_t *token, GList **exceptions);
@@ -46,9 +43,6 @@ int osb_tokenize_text (struct tokenizer *tokenizer,
/* Make tokens for a subject */
void tokenize_subject (struct rspamd_task *task, GTree ** tree);

/* Array of all defined tokenizers */
extern struct tokenizer tokenizers[];

#endif
/*
* vi:ts=4

Loading…
Cancel
Save