]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Improve subject rules 608/head
authorAlexander Moisseev <moiseev@mezonplus.ru>
Thu, 28 Apr 2016 07:46:15 +0000 (10:46 +0300)
committerAlexander Moisseev <moiseev@mezonplus.ru>
Thu, 28 Apr 2016 07:46:15 +0000 (10:46 +0300)
Make scores depend on subject length
Also resolves #548

conf/metrics.conf
rules/misc.lua

index 634a6f69885b3e4c8b230e533be2a10897a6730c..c79299df16668872bef04b250fb03f8b66705b0d 100644 (file)
@@ -43,10 +43,6 @@ metric {
             weight = 1.500000;
             description = "Recipients seems to be autogenerated (works if recipients count is more than 5)";
         }
-        symbol "FAKE_REPLY_C" {
-            weight = 6.0;
-            description = "Fake reply (has RE in subject, but has not References header)";
-        }
         symbol "MIME_HTML_ONLY" {
             weight = 1.0;
             description = "Messages that have only HTML part";
@@ -285,6 +281,23 @@ metric {
         }
     }
 
+    group "subject" {
+        max_score = 6.0;
+
+        symbol "FAKE_REPLY_C" {
+            weight = 6.0;
+            description = "Fake reply (has RE in subject, but has not References header)";
+        }
+        symbol "LONG_SUBJ" {
+            weight = 6.0;
+            description = "Subject is too long";
+        }
+        symbol "SUBJ_ALL_CAPS" {
+            weight = 3.0;
+            description = "No lower case letters in subject";
+        }
+    }
+
     group "mua" {
         symbol "FORGED_MUA_THEBAT_MSGID" {
             weight = 4.0;
index 7bfe735515207c09464c198ee08937e9eb577fd1..d071369c89644e8ed14f47f65dd08504a39345bb 100644 (file)
@@ -31,6 +31,36 @@ local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP'
 reconf['R_WHITE_ON_WHITE'] = string.format('(!(%s) & (%s))', r_bgcolor, r_font_color)
 reconf['R_FLASH_REDIR_IMGSHACK'] = '/^(?:http:\\/\\/)?img\\d{1,5}\\.imageshack\\.us\\/\\S+\\.swf/U'
 
+-- Local functions
+local function insert_linear(task, a, x, symbol)
+    local f = a * x
+    task:insert_result(symbol, ( f < 1 ) and f or 1, tostring(x))
+end
+
+-- Subject issues
+local function subject(task)
+    local sbj = task:get_header('Subject')
+
+    if sbj then
+      local stripped_subject = subject_re:search(sbj, false, true)
+      if stripped_subject and stripped_subject[1] and stripped_subject[1][2] then
+        sbj = stripped_subject[1][2]
+      end
+
+      local l = util.strlen_utf8(sbj)
+      if l > 200 then
+        insert_linear(task, 1/400, l, 'LONG_SUBJ')
+      end
+      if util.is_uppercase(sbj) then
+        insert_linear(task, 1/40, l, 'SUBJ_ALL_CAPS')
+      end
+    end
+
+    return false
+end
+
+rspamd_config:register_symbols(subject, 1.0, 'SUBJ', 'LONG_SUBJ', 'SUBJ_ALL_CAPS');
+
 -- Different text parts
 rspamd_config.R_PARTS_DIFFER = function(task)
   local distance = task:get_mempool():get_variable('parts_distance', 'double')
@@ -106,42 +136,6 @@ rspamd_config.R_SUSPICIOUS_URL = function(task)
     return false
 end
 
-rspamd_config.SUBJ_ALL_CAPS = {
-  callback = function(task)
-    local sbj = task:get_header('Subject')
-
-    if sbj then
-      local stripped_subject = subject_re:search(sbj, false, true)
-      if stripped_subject and stripped_subject[1] and stripped_subject[1][2] then
-        sbj = stripped_subject[1][2]
-      end
-
-      if util.is_uppercase(sbj) then
-        return true
-      end
-    end
-
-    return false
-  end,
-  score = 3.0,
-  group = 'header',
-  description = 'All capital letters in subject'
-}
-
-rspamd_config.LONG_SUBJ = {
-  callback = function(task)
-    local sbj = task:get_header('Subject')
-    if sbj and util.strlen_utf8(sbj) > 200 then
-      return true
-    end
-    return false
-  end,
-
-  score = 3.0,
-  group = 'header',
-  description = 'Subject is too long'
-}
-
 rspamd_config.BROKEN_HEADERS = {
   callback = function(task)
     if task:has_flag('broken_headers') then