diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-09-05 11:32:49 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-09-05 11:32:49 +0100 |
commit | 27ad207da620685499aa04c8e9c7d4e4eafa11dc (patch) | |
tree | 9eb3c7802edeed17e3632d92953d557c81972f69 | |
parent | c3c811c76741dfe5c6feef293068eb90fc10bfdf (diff) | |
download | rspamd-27ad207da620685499aa04c8e9c7d4e4eafa11dc.tar.gz rspamd-27ad207da620685499aa04c8e9c7d4e4eafa11dc.zip |
[Minor] Further fixes for MIXED_CHARSET rule
-rw-r--r-- | src/plugins/chartable.c | 37 |
1 files changed, 27 insertions, 10 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index e1348b561..ce37ebd58 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -585,7 +585,7 @@ rspamd_chartable_process_word_ascii (struct rspamd_task *task, return badness; } -static void +static gboolean rspamd_chartable_process_part (struct rspamd_task *task, struct rspamd_mime_text_part *part, struct chartable_ctx *chartable_module_ctx, @@ -597,7 +597,7 @@ rspamd_chartable_process_part (struct rspamd_task *task, if (part == NULL || part->utf_words == NULL || part->utf_words->len == 0) { - return; + return FALSE; } for (i = 0; i < part->utf_words->len; i++) { @@ -625,15 +625,17 @@ rspamd_chartable_process_part (struct rspamd_task *task, cur_score /= (gdouble)part->nwords; - if (cur_score > 2.0) { - cur_score = 2.0; + if (cur_score > 1.0) { + cur_score = 1.0; } if (cur_score > chartable_module_ctx->threshold) { rspamd_task_insert_result (task, chartable_module_ctx->symbol, cur_score, NULL); - + return TRUE; } + + return FALSE; } static void @@ -645,8 +647,9 @@ chartable_symbol_callback (struct rspamd_task *task, struct rspamd_mime_text_part *part; struct chartable_ctx *chartable_module_ctx = chartable_get_context (task->cfg); const gchar *language = NULL; - gboolean ignore_diacritics = FALSE; + gboolean ignore_diacritics = FALSE, seen_violated_part = FALSE; + /* Check if we have parts with diacritic symbols language */ PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { if (part->languages && part->languages->len > 0) { struct rspamd_lang_detector_res *lang = @@ -660,8 +663,15 @@ chartable_symbol_callback (struct rspamd_task *task, } } - rspamd_chartable_process_part (task, part, chartable_module_ctx, - ignore_diacritics); + if (rspamd_chartable_process_part (task, part, chartable_module_ctx, + ignore_diacritics)) { + seen_violated_part = TRUE; + } + } + + if (MESSAGE_FIELD (task, text_parts)->len == 0) { + /* No text parts, assume that we should ignore diacritics checks for metatokens */ + ignore_diacritics = TRUE; } if (task->meta_words != NULL) { @@ -677,11 +687,18 @@ chartable_symbol_callback (struct rspamd_task *task, cur_score /= (gdouble)arlen; - if (cur_score > 2.0) { - cur_score = 2.0; + if (cur_score > 1.0) { + cur_score = 1.0; } if (cur_score > chartable_module_ctx->threshold) { + if (!seen_violated_part) { + /* Further penalise */ + if (cur_score > 0.25) { + cur_score = 0.25; + } + } + rspamd_task_insert_result (task, chartable_module_ctx->symbol, cur_score, "subject"); |