summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-03-23 17:14:07 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-03-23 17:14:07 +0000
commit11021601e04634213a31df0c16aa4d1c064201ac (patch)
tree12f5fac32c4b5747ea45c967fe9f0d0de19f1a45
parent8e865ebfcb0fe584841c2e082acd894646569213 (diff)
downloadrspamd-11021601e04634213a31df0c16aa4d1c064201ac.tar.gz
rspamd-11021601e04634213a31df0c16aa4d1c064201ac.zip
[Feature] Process subject for mixed characters
-rw-r--r--src/libstat/stat_process.c1
-rw-r--r--src/libutil/util.h2
-rw-r--r--src/plugins/chartable.c35
3 files changed, 36 insertions, 2 deletions
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 8d9717562..e5c17ddff 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -18,7 +18,6 @@
#include "rspamd.h"
#include "stat_internal.h"
#include "libmime/message.h"
-#include "libmime/filter.h"
#include "libmime/images.h"
#include "libserver/html.h"
#include "lua/lua_common.h"
diff --git a/src/libutil/util.h b/src/libutil/util.h
index cfea5f851..48381ed92 100644
--- a/src/libutil/util.h
+++ b/src/libutil/util.h
@@ -516,5 +516,5 @@ gdouble rspamd_normalize_probability (gdouble x, gdouble bias);
*/
guint64 rspamd_tm_to_time (const struct tm *tm, glong tz);
-#define PTR_ARRAY_FOREACH(ar, i, cur) if ((ar) != NULL && (ar)->len > 0) for ((i) = 0; (i) < (ar)->len && (((cur) = g_ptr_array_index((ar), (i))) || 1); ++(i))
+#define PTR_ARRAY_FOREACH(ar, i, cur) for ((i) = 0; (ar) != NULL && (i) < (ar)->len && (((cur) = g_ptr_array_index((ar), (i))) || 1); ++(i))
#endif
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c
index d6efc3ed2..ff3aa480f 100644
--- a/src/plugins/chartable.c
+++ b/src/plugins/chartable.c
@@ -26,6 +26,8 @@
#include "libmime/message.h"
#include "rspamd.h"
#include "libstat/stat_api.h"
+#include "libstat/tokenizers/tokenizers.h"
+
#include "unicode/utf8.h"
#include "unicode/uchar.h"
@@ -399,6 +401,39 @@ chartable_symbol_callback (struct rspamd_task *task, void *unused)
part = g_ptr_array_index (task->text_parts, i);
rspamd_chartable_process_part (task, part);
}
+
+ if (task->subject != NULL) {
+ GArray *words;
+ rspamd_stat_token_t *w;
+ guint i;
+ gdouble cur_score = 0.0;
+
+ words = rspamd_tokenize_text (task->subject, strlen (task->subject),
+ TRUE,
+ NULL,
+ NULL,
+ FALSE,
+ NULL);
+
+ if (words) {
+ for (i = 0; i < words->len; i++) {
+ w = &g_array_index (words, rspamd_stat_token_t, i);
+ cur_score += rspamd_chartable_process_word_utf (task, w, FALSE);
+ }
+ }
+
+ cur_score /= (gdouble)part->normalized_words->len;
+
+ if (cur_score > 2.0) {
+ cur_score = 2.0;
+ }
+
+ if (cur_score > chartable_module_ctx->threshold) {
+ rspamd_task_insert_result (task, chartable_module_ctx->symbol,
+ cur_score, "subject");
+
+ }
+ }
}
static void