From 70c01c990ab4f82a1c7ff82f4c3f2df804de20a6 Mon Sep 17 00:00:00 2001 From: Alexander Moisseev Date: Thu, 28 Apr 2016 10:46:15 +0300 Subject: [PATCH] [Minor] Improve subject rules Make scores depend on subject length Also resolves #548 --- conf/metrics.conf | 21 ++++++++++++--- rules/misc.lua | 66 +++++++++++++++++++++-------------------------- 2 files changed, 47 insertions(+), 40 deletions(-) diff --git a/conf/metrics.conf b/conf/metrics.conf index 634a6f698..c79299df1 100644 --- a/conf/metrics.conf +++ b/conf/metrics.conf @@ -43,10 +43,6 @@ metric { weight = 1.500000; description = "Recipients seems to be autogenerated (works if recipients count is more than 5)"; } - symbol "FAKE_REPLY_C" { - weight = 6.0; - description = "Fake reply (has RE in subject, but has not References header)"; - } symbol "MIME_HTML_ONLY" { weight = 1.0; description = "Messages that have only HTML part"; @@ -285,6 +281,23 @@ metric { } } + group "subject" { + max_score = 6.0; + + symbol "FAKE_REPLY_C" { + weight = 6.0; + description = "Fake reply (has RE in subject, but has not References header)"; + } + symbol "LONG_SUBJ" { + weight = 6.0; + description = "Subject is too long"; + } + symbol "SUBJ_ALL_CAPS" { + weight = 3.0; + description = "No lower case letters in subject"; + } + } + group "mua" { symbol "FORGED_MUA_THEBAT_MSGID" { weight = 4.0; diff --git a/rules/misc.lua b/rules/misc.lua index 7bfe73551..d071369c8 100644 --- a/rules/misc.lua +++ b/rules/misc.lua @@ -31,6 +31,36 @@ local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP' reconf['R_WHITE_ON_WHITE'] = string.format('(!(%s) & (%s))', r_bgcolor, r_font_color) reconf['R_FLASH_REDIR_IMGSHACK'] = '/^(?:http:\\/\\/)?img\\d{1,5}\\.imageshack\\.us\\/\\S+\\.swf/U' +-- Local functions +local function insert_linear(task, a, x, symbol) + local f = a * x + task:insert_result(symbol, ( f < 1 ) and f or 1, tostring(x)) +end + +-- Subject issues +local function subject(task) + local sbj = task:get_header('Subject') + + if sbj then + local stripped_subject = subject_re:search(sbj, false, true) + if stripped_subject and stripped_subject[1] and stripped_subject[1][2] then + sbj = stripped_subject[1][2] + end + + local l = util.strlen_utf8(sbj) + if l > 200 then + insert_linear(task, 1/400, l, 'LONG_SUBJ') + end + if util.is_uppercase(sbj) then + insert_linear(task, 1/40, l, 'SUBJ_ALL_CAPS') + end + end + + return false +end + +rspamd_config:register_symbols(subject, 1.0, 'SUBJ', 'LONG_SUBJ', 'SUBJ_ALL_CAPS'); + -- Different text parts rspamd_config.R_PARTS_DIFFER = function(task) local distance = task:get_mempool():get_variable('parts_distance', 'double') @@ -106,42 +136,6 @@ rspamd_config.R_SUSPICIOUS_URL = function(task) return false end -rspamd_config.SUBJ_ALL_CAPS = { - callback = function(task) - local sbj = task:get_header('Subject') - - if sbj then - local stripped_subject = subject_re:search(sbj, false, true) - if stripped_subject and stripped_subject[1] and stripped_subject[1][2] then - sbj = stripped_subject[1][2] - end - - if util.is_uppercase(sbj) then - return true - end - end - - return false - end, - score = 3.0, - group = 'header', - description = 'All capital letters in subject' -} - -rspamd_config.LONG_SUBJ = { - callback = function(task) - local sbj = task:get_header('Subject') - if sbj and util.strlen_utf8(sbj) > 200 then - return true - end - return false - end, - - score = 3.0, - group = 'header', - description = 'Subject is too long' -} - rspamd_config.BROKEN_HEADERS = { callback = function(task) if task:has_flag('broken_headers') then -- 2.39.5