@@ -773,6 +773,8 @@ rspamd_ucl_tolegacy_output (struct rspamd_task *task, | |||
ucl_object_tostring (elt)); | |||
} | |||
g_assert (ucl_object_todouble (score) < 1000.0); | |||
iter = NULL; | |||
while ((elt = ucl_iterate_object (metric, &iter, true)) != NULL) { | |||
if (elt->type == UCL_OBJECT) { |
@@ -64,7 +64,7 @@ struct bayes_callback_data { | |||
static gboolean | |||
bayes_learn_callback (gpointer key, gpointer value, gpointer data) | |||
{ | |||
token_node_t *node = key; | |||
rspamd_token_t *node = key; | |||
struct bayes_callback_data *cd = data; | |||
gint c; | |||
guint64 v; | |||
@@ -144,7 +144,7 @@ static gboolean | |||
bayes_classify_callback (gpointer key, gpointer value, gpointer data) | |||
{ | |||
token_node_t *node = key; | |||
rspamd_token_t *node = key; | |||
struct bayes_callback_data *cd = data; | |||
guint i; | |||
struct bayes_statfile_data *cur; | |||
@@ -222,9 +222,7 @@ bayes_classify (struct classifier_ctx * ctx, | |||
(value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { | |||
minnodes = strtol (value, NULL, 10); | |||
nodes = g_tree_nnodes (input); | |||
if (nodes > FEATURE_WINDOW_SIZE) { | |||
nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; | |||
} | |||
if (nodes < minnodes) { | |||
return FALSE; | |||
} | |||
@@ -331,9 +329,7 @@ bayes_learn_spam (struct classifier_ctx * ctx, | |||
(value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { | |||
minnodes = strtol (value, NULL, 10); | |||
nodes = g_tree_nnodes (input); | |||
if (nodes > FEATURE_WINDOW_SIZE) { | |||
nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; | |||
} | |||
if (nodes < minnodes) { | |||
g_set_error (err, | |||
bayes_error_quark (), /* error domain */ |
@@ -31,6 +31,35 @@ | |||
* High level statistics API | |||
*/ | |||
struct rspamd_statfile_runtime { | |||
struct rspamd_statfile_config *st; | |||
guint64 hits; | |||
guint64 total_hits; | |||
}; | |||
struct rspamd_classifier_runtime { | |||
double ham_prob; | |||
double spam_prob; | |||
guint64 total_spam; | |||
guint64 total_ham; | |||
guint64 processed_tokens; | |||
gsize max_tokens; | |||
}; | |||
struct rspamd_token_result { | |||
double value; | |||
struct rspamd_statfile_runtime *st_runtime; | |||
struct rspamd_classifier_runtime *cl_runtime; | |||
}; | |||
#define RSPAMD_MAX_TOKEN_LEN 64 | |||
typedef struct token_node_s { | |||
guchar data[RSPAMD_MAX_TOKEN_LEN]; | |||
guint datalen; | |||
GArray *results; | |||
} rspamd_token_t; | |||
/** | |||
* Initialise statistics modules | |||
* @param cfg |
@@ -5,24 +5,7 @@ | |||
#include "mem_pool.h" | |||
#include "fstring.h" | |||
#include "main.h" | |||
/* Size for features pipe */ | |||
#define FEATURE_WINDOW_SIZE 5 | |||
#define MAX_DATA_LEN 64 | |||
#define MAX_VALUES 32 | |||
struct token_result { | |||
double value; | |||
struct rspamd_statfile_config *st; | |||
double *consolidated_value; | |||
}; | |||
typedef struct token_node_s { | |||
guchar data[MAX_DATA_LEN]; | |||
guint datalen; | |||
struct token_result *results; | |||
guint results_len; | |||
} token_node_t; | |||
#include "stat_api.h" | |||
/* Common tokenizer structure */ | |||
struct tokenizer { |
@@ -29,6 +29,9 @@ | |||
#include <sys/types.h> | |||
#include "tokenizers.h" | |||
/* Size for features pipe */ | |||
#define FEATURE_WINDOW_SIZE 5 | |||
/* Minimum length of token */ | |||
#define MIN_LEN 4 | |||
@@ -43,7 +46,7 @@ osb_tokenize_text (struct tokenizer *tokenizer, | |||
gboolean is_utf, | |||
GList *exceptions) | |||
{ | |||
token_node_t *new = NULL; | |||
rspamd_token_t *new = NULL; | |||
rspamd_fstring_t *token; | |||
guint32 hashpipe[FEATURE_WINDOW_SIZE], h1, h2; | |||
gint i, processed = 0; | |||
@@ -82,7 +85,7 @@ osb_tokenize_text (struct tokenizer *tokenizer, | |||
h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1]; | |||
h2 = hashpipe[0] * primes[1] + hashpipe[i] * | |||
primes[(i << 1) - 1]; | |||
new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t)); | |||
new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_token_t)); | |||
new->datalen = sizeof(gint32) * 2; | |||
memcpy(new->data, &h1, sizeof(h1)); | |||
memcpy(new->data + sizeof(h1), &h2, sizeof(h2)); | |||
@@ -98,7 +101,7 @@ osb_tokenize_text (struct tokenizer *tokenizer, | |||
for (i = 1; i < processed; i++) { | |||
h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1]; | |||
h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1]; | |||
new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t)); | |||
new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_token_t)); | |||
new->datalen = sizeof(gint32) * 2; | |||
memcpy(new->data, &h1, sizeof(h1)); | |||
memcpy(new->data + sizeof(h1), &h2, sizeof(h2)); |
@@ -92,7 +92,7 @@ rspamd_stat_get_tokenizer (const char *name) | |||
int | |||
token_node_compare_func (gconstpointer a, gconstpointer b) | |||
{ | |||
const token_node_t *aa = a, *bb = b; | |||
const rspamd_token_t *aa = a, *bb = b; | |||
if (aa->datalen != bb->datalen) { | |||
return aa->datalen - bb->datalen; |