From c44fa2f60f96c9fa6f13a6f867aacdbe687ac6c7 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 3 Sep 2016 16:31:40 +0100 Subject: [Feature] Add more meta-tokens to bayes --- src/libstat/stat_process.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/libstat') diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 00e7ded4c..0d3795f4d 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -22,7 +22,8 @@ #include "libmime/images.h" #include "libserver/html.h" #include "lua/lua_common.h" -#include +#include "utlist.h" +#include #define RSPAMD_CLASSIFY_OP 0 #define RSPAMD_LEARN_OP 1 @@ -74,6 +75,7 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, GArray *ar; rspamd_ftok_t elt; guint i; + gchar tmpbuf[128]; ar = g_array_sized_new (FALSE, FALSE, sizeof (elt), 4); @@ -123,6 +125,14 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, msg_debug_task ("added stat tokens for mime boundary '%s'", elt.begin); g_array_append_val (ar, elt); } + + if (part->content && part->content->len > 1) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "mime%d:%dlog", + (gint)log2 (part->content->len)); + elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf); + elt.len = strlen (elt.begin); + g_array_append_val (ar, elt); + } } } @@ -152,6 +162,23 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, cur = g_list_next (cur); } + /* Size meta-token */ + if (task->msg.len > 1) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "size%dlog", + (gint)log2 (task->msg.len)); + elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf); + elt.len = strlen (elt.begin); + g_array_append_val (ar, elt); + } + /* Number recipients */ + if (task->rcpt_envelope && task->rcpt_envelope->len > 0) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "recipients%d", + task->rcpt_envelope->len); + elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf); + elt.len = strlen (elt.begin); + g_array_append_val (ar, elt); + } + st_ctx->tokenizer->tokenize_func (st_ctx, task->task_pool, ar, -- cgit v1.2.3