Browse Source

Rework statistics runtime structures.

tags/0.9.0
Vsevolod Stakhov 9 years ago
parent
commit
8f5509c65d

+ 2
- 0
src/libserver/protocol.c View File

ucl_object_tostring (elt)); ucl_object_tostring (elt));
} }


g_assert (ucl_object_todouble (score) < 1000.0);

iter = NULL; iter = NULL;
while ((elt = ucl_iterate_object (metric, &iter, true)) != NULL) { while ((elt = ucl_iterate_object (metric, &iter, true)) != NULL) {
if (elt->type == UCL_OBJECT) { if (elt->type == UCL_OBJECT) {

+ 4
- 8
src/libstat/classifiers/bayes.c View File

static gboolean static gboolean
bayes_learn_callback (gpointer key, gpointer value, gpointer data) bayes_learn_callback (gpointer key, gpointer value, gpointer data)
{ {
token_node_t *node = key;
rspamd_token_t *node = key;
struct bayes_callback_data *cd = data; struct bayes_callback_data *cd = data;
gint c; gint c;
guint64 v; guint64 v;
bayes_classify_callback (gpointer key, gpointer value, gpointer data) bayes_classify_callback (gpointer key, gpointer value, gpointer data)
{ {


token_node_t *node = key;
rspamd_token_t *node = key;
struct bayes_callback_data *cd = data; struct bayes_callback_data *cd = data;
guint i; guint i;
struct bayes_statfile_data *cur; struct bayes_statfile_data *cur;
(value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) {
minnodes = strtol (value, NULL, 10); minnodes = strtol (value, NULL, 10);
nodes = g_tree_nnodes (input); nodes = g_tree_nnodes (input);
if (nodes > FEATURE_WINDOW_SIZE) {
nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE;
}

if (nodes < minnodes) { if (nodes < minnodes) {
return FALSE; return FALSE;
} }
(value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) {
minnodes = strtol (value, NULL, 10); minnodes = strtol (value, NULL, 10);
nodes = g_tree_nnodes (input); nodes = g_tree_nnodes (input);
if (nodes > FEATURE_WINDOW_SIZE) {
nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE;
}

if (nodes < minnodes) { if (nodes < minnodes) {
g_set_error (err, g_set_error (err,
bayes_error_quark (), /* error domain */ bayes_error_quark (), /* error domain */

+ 29
- 0
src/libstat/stat_api.h View File

* High level statistics API * High level statistics API
*/ */


struct rspamd_statfile_runtime {
struct rspamd_statfile_config *st;
guint64 hits;
guint64 total_hits;
};

struct rspamd_classifier_runtime {
double ham_prob;
double spam_prob;
guint64 total_spam;
guint64 total_ham;
guint64 processed_tokens;
gsize max_tokens;
};

struct rspamd_token_result {
double value;
struct rspamd_statfile_runtime *st_runtime;

struct rspamd_classifier_runtime *cl_runtime;
};

#define RSPAMD_MAX_TOKEN_LEN 64
typedef struct token_node_s {
guchar data[RSPAMD_MAX_TOKEN_LEN];
guint datalen;
GArray *results;
} rspamd_token_t;

/** /**
* Initialise statistics modules * Initialise statistics modules
* @param cfg * @param cfg

+ 1
- 18
src/libstat/tokenizers.h View File

#include "mem_pool.h" #include "mem_pool.h"
#include "fstring.h" #include "fstring.h"
#include "main.h" #include "main.h"

/* Size for features pipe */
#define FEATURE_WINDOW_SIZE 5
#define MAX_DATA_LEN 64
#define MAX_VALUES 32

struct token_result {
double value;
struct rspamd_statfile_config *st;
double *consolidated_value;
};

typedef struct token_node_s {
guchar data[MAX_DATA_LEN];
guint datalen;
struct token_result *results;
guint results_len;
} token_node_t;
#include "stat_api.h"


/* Common tokenizer structure */ /* Common tokenizer structure */
struct tokenizer { struct tokenizer {

+ 6
- 3
src/libstat/tokenizers/osb.c View File

#include <sys/types.h> #include <sys/types.h>
#include "tokenizers.h" #include "tokenizers.h"


/* Size for features pipe */
#define FEATURE_WINDOW_SIZE 5

/* Minimum length of token */ /* Minimum length of token */
#define MIN_LEN 4 #define MIN_LEN 4


gboolean is_utf, gboolean is_utf,
GList *exceptions) GList *exceptions)
{ {
token_node_t *new = NULL;
rspamd_token_t *new = NULL;
rspamd_fstring_t *token; rspamd_fstring_t *token;
guint32 hashpipe[FEATURE_WINDOW_SIZE], h1, h2; guint32 hashpipe[FEATURE_WINDOW_SIZE], h1, h2;
gint i, processed = 0; gint i, processed = 0;
h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1]; h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1];
h2 = hashpipe[0] * primes[1] + hashpipe[i] * h2 = hashpipe[0] * primes[1] + hashpipe[i] *
primes[(i << 1) - 1]; primes[(i << 1) - 1];
new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t));
new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_token_t));
new->datalen = sizeof(gint32) * 2; new->datalen = sizeof(gint32) * 2;
memcpy(new->data, &h1, sizeof(h1)); memcpy(new->data, &h1, sizeof(h1));
memcpy(new->data + sizeof(h1), &h2, sizeof(h2)); memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
for (i = 1; i < processed; i++) { for (i = 1; i < processed; i++) {
h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1]; h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1];
h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1]; h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1];
new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t));
new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_token_t));
new->datalen = sizeof(gint32) * 2; new->datalen = sizeof(gint32) * 2;
memcpy(new->data, &h1, sizeof(h1)); memcpy(new->data, &h1, sizeof(h1));
memcpy(new->data + sizeof(h1), &h2, sizeof(h2)); memcpy(new->data + sizeof(h1), &h2, sizeof(h2));

+ 1
- 1
src/libstat/tokenizers/tokenizers.c View File

int int
token_node_compare_func (gconstpointer a, gconstpointer b) token_node_compare_func (gconstpointer a, gconstpointer b)
{ {
const token_node_t *aa = a, *bb = b;
const rspamd_token_t *aa = a, *bb = b;


if (aa->datalen != bb->datalen) { if (aa->datalen != bb->datalen) {
return aa->datalen - bb->datalen; return aa->datalen - bb->datalen;

Loading…
Cancel
Save