summaryrefslogtreecommitdiffstats
path: root/src/libstat
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-09-03 16:31:40 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-09-03 16:33:46 +0100
commitc44fa2f60f96c9fa6f13a6f867aacdbe687ac6c7 (patch)
tree432a9e16caeddc0e9be88da4da940d4ecb3c64db /src/libstat
parent8a60ead3de181979741fa97eb875fc80a4a7f0ed (diff)
downloadrspamd-c44fa2f60f96c9fa6f13a6f867aacdbe687ac6c7.tar.gz
rspamd-c44fa2f60f96c9fa6f13a6f867aacdbe687ac6c7.zip
[Feature] Add more meta-tokens to bayes
Diffstat (limited to 'src/libstat')
-rw-r--r--src/libstat/stat_process.c29
1 files changed, 28 insertions, 1 deletions
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 00e7ded4c..0d3795f4d 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -22,7 +22,8 @@
#include "libmime/images.h"
#include "libserver/html.h"
#include "lua/lua_common.h"
-#include <utlist.h>
+#include "utlist.h"
+#include <math.h>
#define RSPAMD_CLASSIFY_OP 0
#define RSPAMD_LEARN_OP 1
@@ -74,6 +75,7 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
GArray *ar;
rspamd_ftok_t elt;
guint i;
+ gchar tmpbuf[128];
ar = g_array_sized_new (FALSE, FALSE, sizeof (elt), 4);
@@ -123,6 +125,14 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
msg_debug_task ("added stat tokens for mime boundary '%s'", elt.begin);
g_array_append_val (ar, elt);
}
+
+ if (part->content && part->content->len > 1) {
+ rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "mime%d:%dlog",
+ (gint)log2 (part->content->len));
+ elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf);
+ elt.len = strlen (elt.begin);
+ g_array_append_val (ar, elt);
+ }
}
}
@@ -152,6 +162,23 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
cur = g_list_next (cur);
}
+ /* Size meta-token */
+ if (task->msg.len > 1) {
+ rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "size%dlog",
+ (gint)log2 (task->msg.len));
+ elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf);
+ elt.len = strlen (elt.begin);
+ g_array_append_val (ar, elt);
+ }
+ /* Number recipients */
+ if (task->rcpt_envelope && task->rcpt_envelope->len > 0) {
+ rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "recipients%d",
+ task->rcpt_envelope->len);
+ elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf);
+ elt.len = strlen (elt.begin);
+ g_array_append_val (ar, elt);
+ }
+
st_ctx->tokenizer->tokenize_func (st_ctx,
task->task_pool,
ar,