From 27ad207da620685499aa04c8e9c7d4e4eafa11dc Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 5 Sep 2020 11:32:49 +0100 Subject: [PATCH] [Minor] Further fixes for MIXED_CHARSET rule --- src/plugins/chartable.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index e1348b561..ce37ebd58 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -585,7 +585,7 @@ rspamd_chartable_process_word_ascii (struct rspamd_task *task, return badness; } -static void +static gboolean rspamd_chartable_process_part (struct rspamd_task *task, struct rspamd_mime_text_part *part, struct chartable_ctx *chartable_module_ctx, @@ -597,7 +597,7 @@ rspamd_chartable_process_part (struct rspamd_task *task, if (part == NULL || part->utf_words == NULL || part->utf_words->len == 0) { - return; + return FALSE; } for (i = 0; i < part->utf_words->len; i++) { @@ -625,15 +625,17 @@ rspamd_chartable_process_part (struct rspamd_task *task, cur_score /= (gdouble)part->nwords; - if (cur_score > 2.0) { - cur_score = 2.0; + if (cur_score > 1.0) { + cur_score = 1.0; } if (cur_score > chartable_module_ctx->threshold) { rspamd_task_insert_result (task, chartable_module_ctx->symbol, cur_score, NULL); - + return TRUE; } + + return FALSE; } static void @@ -645,8 +647,9 @@ chartable_symbol_callback (struct rspamd_task *task, struct rspamd_mime_text_part *part; struct chartable_ctx *chartable_module_ctx = chartable_get_context (task->cfg); const gchar *language = NULL; - gboolean ignore_diacritics = FALSE; + gboolean ignore_diacritics = FALSE, seen_violated_part = FALSE; + /* Check if we have parts with diacritic symbols language */ PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { if (part->languages && part->languages->len > 0) { struct rspamd_lang_detector_res *lang = @@ -660,8 +663,15 @@ chartable_symbol_callback (struct rspamd_task *task, } } - rspamd_chartable_process_part (task, part, chartable_module_ctx, - ignore_diacritics); + if (rspamd_chartable_process_part (task, part, chartable_module_ctx, + ignore_diacritics)) { + seen_violated_part = TRUE; + } + } + + if (MESSAGE_FIELD (task, text_parts)->len == 0) { + /* No text parts, assume that we should ignore diacritics checks for metatokens */ + ignore_diacritics = TRUE; } if (task->meta_words != NULL) { @@ -677,11 +687,18 @@ chartable_symbol_callback (struct rspamd_task *task, cur_score /= (gdouble)arlen; - if (cur_score > 2.0) { - cur_score = 2.0; + if (cur_score > 1.0) { + cur_score = 1.0; } if (cur_score > chartable_module_ctx->threshold) { + if (!seen_violated_part) { + /* Further penalise */ + if (cur_score > 0.25) { + cur_score = 0.25; + } + } + rspamd_task_insert_result (task, chartable_module_ctx->symbol, cur_score, "subject"); -- 2.39.5