From 6ee6b7a2642f6ef7d8805ce6a1299196fbe683e3 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 8 Jan 2016 15:57:15 +0000 Subject: [PATCH] Impelement the concept when classifier values are integers --- src/libserver/cfg_file.h | 4 +++ src/libstat/backends/redis_backend.c | 38 +++++++++++++++++++++++----- src/libstat/classifiers/bayes.c | 1 + 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 1dfd3419e..b03facd3a 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -127,6 +127,9 @@ struct rspamd_tokenizer_config { const gchar *name; /**< name of tokenizer */ }; + +/* Classifier has all integer values (e.g. bayes) */ +#define RSPAMD_FLAG_CLASSIFIER_INTEGER (1 << 0) /** * Classifier config definition */ @@ -143,6 +146,7 @@ struct rspamd_classifier_config { gchar *name; /**< unique name of classifier */ guint32 min_tokens; /**< minimal number of tokens to process classifier */ guint32 max_tokens; /**< maximum number of tokens */ + guint flags; }; struct rspamd_worker_bind_conf { diff --git a/src/libstat/backends/redis_backend.c b/src/libstat/backends/redis_backend.c index e81d1b9eb..a83ab3081 100644 --- a/src/libstat/backends/redis_backend.c +++ b/src/libstat/backends/redis_backend.c @@ -58,6 +58,7 @@ struct redis_stat_runtime { struct upstream *selected; struct event timeout_event; GArray *results; + struct rspamd_statfile_config *stcf; gchar *redis_object_expanded; redisAsyncContext *redis; guint64 learned; @@ -278,7 +279,8 @@ rspamd_redis_expand_object (const gchar *pattern, static rspamd_fstring_t * rspamd_redis_tokens_to_query (struct rspamd_task *task, GPtrArray *tokens, - const gchar *arg0, const gchar *arg1, gboolean learn, gint idx) + const gchar *arg0, const gchar *arg1, gboolean learn, gint idx, + gboolean intvals) { rspamd_fstring_t *out; rspamd_token_t *tok; @@ -319,7 +321,8 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, GPtrArray *tokens, larg1, arg1); l0 = rspamd_snprintf (n0, sizeof (n0), "%uL", num); - if (tok->values[idx] == (guint64)tok->values[idx]) { + + if (intvals) { l1 = rspamd_snprintf (n1, sizeof (n1), "%uL", (guint64)tok->values[idx]); } @@ -454,6 +457,7 @@ rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) rspamd_token_t *tok; guint i, processed = 0, found = 0; gulong val; + gdouble float_val; task = rt->task; @@ -465,15 +469,24 @@ rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) for (i = 0; i < reply->elements; i ++) { elt = reply->element[i]; - if (elt->type == REDIS_REPLY_INTEGER) { + if (G_LIKELY (elt->type == REDIS_REPLY_INTEGER)) { tok = g_ptr_array_index (task->tokens, i); tok->values[rt->id] = elt->integer; found ++; } else if (elt->type == REDIS_REPLY_STRING) { tok = g_ptr_array_index (task->tokens, i); - rspamd_strtoul (elt->str, elt->len, &val); - tok->values[rt->id] = val; + + if (rt->stcf->clcf->flags & + RSPAMD_FLAG_CLASSIFIER_INTEGER) { + rspamd_strtoul (elt->str, elt->len, &val); + tok->values[rt->id] = val; + } + else { + float_val = strtod (elt->str, NULL); + tok->values[rt->id] = float_val; + } + found ++; } else { @@ -643,6 +656,7 @@ rspamd_redis_runtime (struct rspamd_task *task, rt->selected = up; rt->task = task; rt->ctx = ctx; + rt->stcf = stcf; rt->conn_state = RSPAMD_REDIS_DISCONNECTED; addr = rspamd_upstream_addr (up); @@ -700,7 +714,8 @@ rspamd_redis_process_tokens (struct rspamd_task *task, rt->id = id; query = rspamd_redis_tokens_to_query (task, tokens, - "HMGET", rt->redis_object_expanded, FALSE, -1); + "HMGET", rt->redis_object_expanded, FALSE, -1, + rt->stcf->clcf->flags & RSPAMD_FLAG_CLASSIFIER_INTEGER); g_assert (query != NULL); ret = redisAsyncFormattedCommand (rt->redis, rspamd_redis_processed, rt, @@ -745,6 +760,7 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, rspamd_inet_addr_t *addr; struct timeval tv; rspamd_fstring_t *query; + const gchar *redis_cmd; gint ret; if (rt->conn_state != RSPAMD_REDIS_DISCONNECTED) { @@ -779,9 +795,17 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, double_to_tv (rt->ctx->timeout, &tv); event_add (&rt->timeout_event, &tv); + if (rt->stcf->clcf->flags & RSPAMD_FLAG_CLASSIFIER_INTEGER) { + redis_cmd = "HINCRBY"; + } + else { + redis_cmd = "HINCRBYFLOAT"; + } + rt->id = id; query = rspamd_redis_tokens_to_query (task, tokens, - "HINCRBYFLOAT", rt->redis_object_expanded, TRUE, id); + redis_cmd, rt->redis_object_expanded, TRUE, id, + rt->stcf->clcf->flags & RSPAMD_FLAG_CLASSIFIER_INTEGER); g_assert (query != NULL); ret = redisAsyncFormattedCommand (rt->redis, rspamd_redis_learned, rt, diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index b08c70380..694898db5 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -197,6 +197,7 @@ bayes_normalize_prob (gdouble x) void bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier *cl) { + cl->cfg->flags |= RSPAMD_FLAG_CLASSIFIER_INTEGER; } gboolean -- 2.39.5