]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Process subject for mixed characters
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 23 Mar 2017 17:14:07 +0000 (17:14 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 23 Mar 2017 17:14:07 +0000 (17:14 +0000)
src/libstat/stat_process.c
src/libutil/util.h
src/plugins/chartable.c

index 8d9717562285d3f9cec1cb3d6376f3da2be3a53e..e5c17ddff546edb7a4f5126caa8c60e838e83e18 100644 (file)
@@ -18,7 +18,6 @@
 #include "rspamd.h"
 #include "stat_internal.h"
 #include "libmime/message.h"
-#include "libmime/filter.h"
 #include "libmime/images.h"
 #include "libserver/html.h"
 #include "lua/lua_common.h"
index cfea5f85178e3db4c6a28d7049422130cb722411..48381ed92db88005adffcc1085ea9d3202b1eb32 100644 (file)
@@ -516,5 +516,5 @@ gdouble rspamd_normalize_probability (gdouble x, gdouble bias);
  */
 guint64 rspamd_tm_to_time (const struct tm *tm, glong tz);
 
-#define PTR_ARRAY_FOREACH(ar, i, cur) if ((ar) != NULL && (ar)->len > 0) for ((i) = 0; (i) < (ar)->len && (((cur) = g_ptr_array_index((ar), (i))) || 1); ++(i))
+#define PTR_ARRAY_FOREACH(ar, i, cur) for ((i) = 0; (ar) != NULL && (i) < (ar)->len && (((cur) = g_ptr_array_index((ar), (i))) || 1); ++(i))
 #endif
index d6efc3ed265e7bdd1bc49d3492d344a69a441d04..ff3aa480f0bce8d80cfd2ce19a538723f2572f2c 100644 (file)
@@ -26,6 +26,8 @@
 #include "libmime/message.h"
 #include "rspamd.h"
 #include "libstat/stat_api.h"
+#include "libstat/tokenizers/tokenizers.h"
+
 #include "unicode/utf8.h"
 #include "unicode/uchar.h"
 
@@ -399,6 +401,39 @@ chartable_symbol_callback (struct rspamd_task *task, void *unused)
                part = g_ptr_array_index (task->text_parts, i);
                rspamd_chartable_process_part (task, part);
        }
+
+       if (task->subject != NULL) {
+               GArray *words;
+               rspamd_stat_token_t *w;
+               guint i;
+               gdouble cur_score = 0.0;
+
+               words = rspamd_tokenize_text (task->subject, strlen (task->subject),
+                               TRUE,
+                               NULL,
+                               NULL,
+                               FALSE,
+                               NULL);
+
+               if (words) {
+                       for (i = 0; i < words->len; i++) {
+                               w = &g_array_index (words, rspamd_stat_token_t, i);
+                               cur_score += rspamd_chartable_process_word_utf (task, w, FALSE);
+                       }
+               }
+
+               cur_score /= (gdouble)part->normalized_words->len;
+
+               if (cur_score > 2.0) {
+                       cur_score = 2.0;
+               }
+
+               if (cur_score > chartable_module_ctx->threshold) {
+                       rspamd_task_insert_result (task, chartable_module_ctx->symbol,
+                                       cur_score, "subject");
+
+               }
+       }
 }
 
 static void