9 年前 · 8f5509c65d
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -773,6 +773,8 @@ rspamd_ucl_tolegacy_output (struct rspamd_task *task,
 				ucl_object_tostring (elt));
 		}

 		g_assert (ucl_object_todouble (score) < 1000.0);

 		iter = NULL;
 		while ((elt = ucl_iterate_object (metric, &iter, true)) != NULL) {
 			if (elt->type == UCL_OBJECT) {
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -64,7 +64,7 @@ struct bayes_callback_data {
 static gboolean
 bayes_learn_callback (gpointer key, gpointer value, gpointer data)
 {
 	token_node_t *node = key;
 	rspamd_token_t *node = key;
 	struct bayes_callback_data *cd = data;
 	gint c;
 	guint64 v;
@@ -144,7 +144,7 @@ static gboolean
 bayes_classify_callback (gpointer key, gpointer value, gpointer data)
 {

 	token_node_t *node = key;
 	rspamd_token_t *node = key;
 	struct bayes_callback_data *cd = data;
 	guint i;
 	struct bayes_statfile_data *cur;
@@ -222,9 +222,7 @@ bayes_classify (struct classifier_ctx * ctx,
 		(value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) {
 		minnodes = strtol (value, NULL, 10);
 		nodes = g_tree_nnodes (input);
 		if (nodes > FEATURE_WINDOW_SIZE) {
 			nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE;
 		}

 		if (nodes < minnodes) {
 			return FALSE;
 		}
@@ -331,9 +329,7 @@ bayes_learn_spam (struct classifier_ctx * ctx,
 		(value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) {
 		minnodes = strtol (value, NULL, 10);
 		nodes = g_tree_nnodes (input);
 		if (nodes > FEATURE_WINDOW_SIZE) {
 			nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE;
 		}

 		if (nodes < minnodes) {
 			g_set_error (err,
 				bayes_error_quark (),           /* error domain */
--- a/src/libstat/stat_api.h
+++ b/src/libstat/stat_api.h
@@ -31,6 +31,35 @@
 * High level statistics API
 */

 struct rspamd_statfile_runtime {
 	struct rspamd_statfile_config *st;
 	guint64 hits;
 	guint64 total_hits;
 };

 struct rspamd_classifier_runtime {
 	double ham_prob;
 	double spam_prob;
 	guint64 total_spam;
 	guint64 total_ham;
 	guint64 processed_tokens;
 	gsize max_tokens;
 };

 struct rspamd_token_result {
 	double value;
 	struct rspamd_statfile_runtime *st_runtime;

 	struct rspamd_classifier_runtime *cl_runtime;
 };

 #define RSPAMD_MAX_TOKEN_LEN 64
 typedef struct token_node_s {
 	guchar data[RSPAMD_MAX_TOKEN_LEN];
 	guint datalen;
 	GArray *results;
 } rspamd_token_t;

 /**
 * Initialise statistics modules
 * @param cfg
--- a/src/libstat/tokenizers.h
+++ b/src/libstat/tokenizers.h
@@ -5,24 +5,7 @@
 #include "mem_pool.h"
 #include "fstring.h"
 #include "main.h"

 /* Size for features pipe */
 #define FEATURE_WINDOW_SIZE 5
 #define MAX_DATA_LEN 64
 #define MAX_VALUES 32

 struct token_result {
 	double value;
 	struct rspamd_statfile_config *st;
 	double *consolidated_value;
 };

 typedef struct token_node_s {
 	guchar data[MAX_DATA_LEN];
 	guint datalen;
 	struct token_result *results;
 	guint results_len;
 } token_node_t;
 #include "stat_api.h"

 /* Common tokenizer structure */
 struct tokenizer {
--- a/src/libstat/tokenizers/osb.c
+++ b/src/libstat/tokenizers/osb.c
@@ -29,6 +29,9 @@
 #include <sys/types.h>
 #include "tokenizers.h"

 /* Size for features pipe */
 #define FEATURE_WINDOW_SIZE 5

 /* Minimum length of token */
 #define MIN_LEN 4

@@ -43,7 +46,7 @@ osb_tokenize_text (struct tokenizer *tokenizer,
 	gboolean is_utf,
 	GList *exceptions)
 {
 	token_node_t *new = NULL;
 	rspamd_token_t *new = NULL;
 	rspamd_fstring_t *token;
 	guint32 hashpipe[FEATURE_WINDOW_SIZE], h1, h2;
 	gint i, processed = 0;
@@ -82,7 +85,7 @@ osb_tokenize_text (struct tokenizer *tokenizer,
 				h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1];
 				h2 = hashpipe[0] * primes[1] + hashpipe[i] *
 					primes[(i << 1) - 1];
 				new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t));
 				new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_token_t));
 				new->datalen = sizeof(gint32) * 2;
 				memcpy(new->data, &h1, sizeof(h1));
 				memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
@@ -98,7 +101,7 @@ osb_tokenize_text (struct tokenizer *tokenizer,
 		for (i = 1; i < processed; i++) {
 			h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1];
 			h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1];
 			new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t));
 			new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_token_t));
 			new->datalen = sizeof(gint32) * 2;
 			memcpy(new->data, &h1, sizeof(h1));
 			memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -92,7 +92,7 @@ rspamd_stat_get_tokenizer (const char *name)
 int
 token_node_compare_func (gconstpointer a, gconstpointer b)
 {
 	const token_node_t *aa = a, *bb = b;
 	const rspamd_token_t *aa = a, *bb = b;

 	if (aa->datalen != bb->datalen) {
 		return aa->datalen - bb->datalen;