]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Further fixes for MIXED_CHARSET rule
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 5 Sep 2020 10:32:49 +0000 (11:32 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 5 Sep 2020 10:32:49 +0000 (11:32 +0100)
src/plugins/chartable.c

index e1348b561bf21eed28540aff1f66dfd553cd943b..ce37ebd58a3afc5f58372042a97582dd88e86250 100644 (file)
@@ -585,7 +585,7 @@ rspamd_chartable_process_word_ascii (struct rspamd_task *task,
        return badness;
 }
 
-static void
+static gboolean
 rspamd_chartable_process_part (struct rspamd_task *task,
                                                           struct rspamd_mime_text_part *part,
                                                           struct chartable_ctx *chartable_module_ctx,
@@ -597,7 +597,7 @@ rspamd_chartable_process_part (struct rspamd_task *task,
 
        if (part == NULL || part->utf_words == NULL ||
                        part->utf_words->len == 0) {
-               return;
+               return FALSE;
        }
 
        for (i = 0; i < part->utf_words->len; i++) {
@@ -625,15 +625,17 @@ rspamd_chartable_process_part (struct rspamd_task *task,
 
        cur_score /= (gdouble)part->nwords;
 
-       if (cur_score > 2.0) {
-               cur_score = 2.0;
+       if (cur_score > 1.0) {
+               cur_score = 1.0;
        }
 
        if (cur_score > chartable_module_ctx->threshold) {
                rspamd_task_insert_result (task, chartable_module_ctx->symbol,
                                cur_score, NULL);
-
+               return TRUE;
        }
+
+       return FALSE;
 }
 
 static void
@@ -645,8 +647,9 @@ chartable_symbol_callback (struct rspamd_task *task,
        struct rspamd_mime_text_part *part;
        struct chartable_ctx *chartable_module_ctx = chartable_get_context (task->cfg);
        const gchar *language = NULL;
-       gboolean ignore_diacritics = FALSE;
+       gboolean ignore_diacritics = FALSE, seen_violated_part = FALSE;
 
+       /* Check if we have parts with diacritic symbols language */
        PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) {
                if (part->languages && part->languages->len > 0) {
                        struct rspamd_lang_detector_res *lang =
@@ -660,8 +663,15 @@ chartable_symbol_callback (struct rspamd_task *task,
                        }
                }
 
-               rspamd_chartable_process_part (task, part, chartable_module_ctx,
-                               ignore_diacritics);
+               if (rspamd_chartable_process_part (task, part, chartable_module_ctx,
+                               ignore_diacritics)) {
+                       seen_violated_part = TRUE;
+               }
+       }
+
+       if (MESSAGE_FIELD (task, text_parts)->len == 0) {
+               /* No text parts, assume that we should ignore diacritics checks for metatokens */
+               ignore_diacritics = TRUE;
        }
 
        if (task->meta_words != NULL) {
@@ -677,11 +687,18 @@ chartable_symbol_callback (struct rspamd_task *task,
 
                cur_score /= (gdouble)arlen;
 
-               if (cur_score > 2.0) {
-                       cur_score = 2.0;
+               if (cur_score > 1.0) {
+                       cur_score = 1.0;
                }
 
                if (cur_score > chartable_module_ctx->threshold) {
+                       if (!seen_violated_part) {
+                               /* Further penalise */
+                               if (cur_score > 0.25) {
+                                       cur_score = 0.25;
+                               }
+                       }
+
                        rspamd_task_insert_result (task, chartable_module_ctx->symbol,
                                        cur_score, "subject");