diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-03-23 17:14:07 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-03-23 17:14:07 +0000 |
commit | 11021601e04634213a31df0c16aa4d1c064201ac (patch) | |
tree | 12f5fac32c4b5747ea45c967fe9f0d0de19f1a45 | |
parent | 8e865ebfcb0fe584841c2e082acd894646569213 (diff) | |
download | rspamd-11021601e04634213a31df0c16aa4d1c064201ac.tar.gz rspamd-11021601e04634213a31df0c16aa4d1c064201ac.zip |
[Feature] Process subject for mixed characters
-rw-r--r-- | src/libstat/stat_process.c | 1 | ||||
-rw-r--r-- | src/libutil/util.h | 2 | ||||
-rw-r--r-- | src/plugins/chartable.c | 35 |
3 files changed, 36 insertions, 2 deletions
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 8d9717562..e5c17ddff 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -18,7 +18,6 @@ #include "rspamd.h" #include "stat_internal.h" #include "libmime/message.h" -#include "libmime/filter.h" #include "libmime/images.h" #include "libserver/html.h" #include "lua/lua_common.h" diff --git a/src/libutil/util.h b/src/libutil/util.h index cfea5f851..48381ed92 100644 --- a/src/libutil/util.h +++ b/src/libutil/util.h @@ -516,5 +516,5 @@ gdouble rspamd_normalize_probability (gdouble x, gdouble bias); */ guint64 rspamd_tm_to_time (const struct tm *tm, glong tz); -#define PTR_ARRAY_FOREACH(ar, i, cur) if ((ar) != NULL && (ar)->len > 0) for ((i) = 0; (i) < (ar)->len && (((cur) = g_ptr_array_index((ar), (i))) || 1); ++(i)) +#define PTR_ARRAY_FOREACH(ar, i, cur) for ((i) = 0; (ar) != NULL && (i) < (ar)->len && (((cur) = g_ptr_array_index((ar), (i))) || 1); ++(i)) #endif diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index d6efc3ed2..ff3aa480f 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -26,6 +26,8 @@ #include "libmime/message.h" #include "rspamd.h" #include "libstat/stat_api.h" +#include "libstat/tokenizers/tokenizers.h" + #include "unicode/utf8.h" #include "unicode/uchar.h" @@ -399,6 +401,39 @@ chartable_symbol_callback (struct rspamd_task *task, void *unused) part = g_ptr_array_index (task->text_parts, i); rspamd_chartable_process_part (task, part); } + + if (task->subject != NULL) { + GArray *words; + rspamd_stat_token_t *w; + guint i; + gdouble cur_score = 0.0; + + words = rspamd_tokenize_text (task->subject, strlen (task->subject), + TRUE, + NULL, + NULL, + FALSE, + NULL); + + if (words) { + for (i = 0; i < words->len; i++) { + w = &g_array_index (words, rspamd_stat_token_t, i); + cur_score += rspamd_chartable_process_word_utf (task, w, FALSE); + } + } + + cur_score /= (gdouble)part->normalized_words->len; + + if (cur_score > 2.0) { + cur_score = 2.0; + } + + if (cur_score > chartable_module_ctx->threshold) { + rspamd_task_insert_result (task, chartable_module_ctx->symbol, + cur_score, "subject"); + + } + } } static void |