diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-09-03 16:31:40 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-09-03 16:33:46 +0100 |
commit | c44fa2f60f96c9fa6f13a6f867aacdbe687ac6c7 (patch) | |
tree | 432a9e16caeddc0e9be88da4da940d4ecb3c64db | |
parent | 8a60ead3de181979741fa97eb875fc80a4a7f0ed (diff) | |
download | rspamd-c44fa2f60f96c9fa6f13a6f867aacdbe687ac6c7.tar.gz rspamd-c44fa2f60f96c9fa6f13a6f867aacdbe687ac6c7.zip |
[Feature] Add more meta-tokens to bayes
-rw-r--r-- | src/libstat/stat_process.c | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 00e7ded4c..0d3795f4d 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -22,7 +22,8 @@ #include "libmime/images.h" #include "libserver/html.h" #include "lua/lua_common.h" -#include <utlist.h> +#include "utlist.h" +#include <math.h> #define RSPAMD_CLASSIFY_OP 0 #define RSPAMD_LEARN_OP 1 @@ -74,6 +75,7 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, GArray *ar; rspamd_ftok_t elt; guint i; + gchar tmpbuf[128]; ar = g_array_sized_new (FALSE, FALSE, sizeof (elt), 4); @@ -123,6 +125,14 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, msg_debug_task ("added stat tokens for mime boundary '%s'", elt.begin); g_array_append_val (ar, elt); } + + if (part->content && part->content->len > 1) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "mime%d:%dlog", + (gint)log2 (part->content->len)); + elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf); + elt.len = strlen (elt.begin); + g_array_append_val (ar, elt); + } } } @@ -152,6 +162,23 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, cur = g_list_next (cur); } + /* Size meta-token */ + if (task->msg.len > 1) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "size%dlog", + (gint)log2 (task->msg.len)); + elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf); + elt.len = strlen (elt.begin); + g_array_append_val (ar, elt); + } + /* Number recipients */ + if (task->rcpt_envelope && task->rcpt_envelope->len > 0) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "recipients%d", + task->rcpt_envelope->len); + elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf); + elt.len = strlen (elt.begin); + g_array_append_val (ar, elt); + } + st_ctx->tokenizer->tokenize_func (st_ctx, task->task_pool, ar, |