aboutsummaryrefslogtreecommitdiffstats
path: root/conf
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-23 16:11:49 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-23 16:11:49 +0100
commit34bed7350efbdd0a3b135b4b7dbf508bdbae9c1a (patch)
treef081d655fae27a88a97914e236f2982057ee9d59 /conf
parenta3cfc0f8bde8975daf7f448ca19e31cf245fe7c1 (diff)
downloadrspamd-34bed7350efbdd0a3b135b4b7dbf508bdbae9c1a.tar.gz
rspamd-34bed7350efbdd0a3b135b4b7dbf508bdbae9c1a.zip
Start work on new HTML rules.
Diffstat (limited to 'conf')
-rw-r--r--conf/lua/html.lua53
-rw-r--r--conf/lua/regexp/headers.lua4
-rw-r--r--conf/lua/rspamd.lua5
-rw-r--r--conf/metrics.conf10
4 files changed, 64 insertions, 8 deletions
diff --git a/conf/lua/html.lua b/conf/lua/html.lua
new file mode 100644
index 000000000..bd7abd987
--- /dev/null
+++ b/conf/lua/html.lua
@@ -0,0 +1,53 @@
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to you under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at:
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+local reconf = config['regexp']
+local rspamd_regexp = require "rspamd_regexp"
+
+-- Messages that have only HTML part
+reconf['MIME_HTML_ONLY'] = 'has_only_html_part()'
+
+local function check_html_image(task, min, max)
+ local tp = task:get_text_parts()
+
+ for _,p in ipairs(tp) do
+ if p:is_html() then
+ local hc = p:get_html()
+ local len = p:get_raw_length()
+
+ if len >= min and len < max then
+ local images = hc:get_images()
+
+ if images then
+ for _,i in ipairs(images) do
+ if i['embedded'] then
+ return true
+ end
+ end
+ end
+ end
+ end
+ end
+end
+
+rspamd_config.HTML_SHORT_LINK_IMG_1 = function(task)
+ return check_html_image(task, 0, 1024)
+end
+rspamd_config.HTML_SHORT_LINK_IMG_2 = function(task)
+ return check_html_image(task, 1024, 1536)
+end
+rspamd_config.HTML_SHORT_LINK_IMG_3 = function(task)
+ return check_html_image(task, 1536, 2048)
+end \ No newline at end of file
diff --git a/conf/lua/regexp/headers.lua b/conf/lua/regexp/headers.lua
index e6f079e86..e8bc7af44 100644
--- a/conf/lua/regexp/headers.lua
+++ b/conf/lua/regexp/headers.lua
@@ -70,10 +70,6 @@ reconf['R_MISSING_CHARSET']= string.format('content_type_is_type(text) & !conten
-- Subject seems to be spam
reconf['R_SAJDING'] = 'Subject=/\\bsajding(?:om|a)?\\b/iH'
--- Messages that have only HTML part
-reconf['MIME_HTML_ONLY'] = 'has_only_html_part()'
-
-
-- Find forged Outlook MUA
-- Yahoo groups messages
local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H'
diff --git a/conf/lua/rspamd.lua b/conf/lua/rspamd.lua
index df480a72c..bb1709c69 100644
--- a/conf/lua/rspamd.lua
+++ b/conf/lua/rspamd.lua
@@ -32,16 +32,13 @@ dofile('regexp/headers.lua')
dofile('regexp/lotto.lua')
dofile('regexp/fraud.lua')
dofile('regexp/drugs.lua')
+dofile('html.lua')
local reconf = config['regexp']
local util = require "rspamd_util"
-- Uncategorized rules
-local html_length_1024_1536 = 'has_content_part_len(\'text\', \'html\', 1024, 1536)'
-local html_link_image = '/<img /iPr'
-reconf['HTML_SHORT_LINK_IMG_2'] = string.format('(%s) & (%s)', html_length_1024_1536, html_link_image)
-
-- Local rules
local r_bgcolor = '/BGCOLOR=/iP'
local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP'
diff --git a/conf/metrics.conf b/conf/metrics.conf
index 95826765e..b3fc30eca 100644
--- a/conf/metrics.conf
+++ b/conf/metrics.conf
@@ -430,9 +430,19 @@ metric {
symbol {
weight = 3.0;
description = "Short html part with a link to an image";
+ name = "HTML_SHORT_LINK_IMG_1";
+ }
+ symbol {
+ weight = 1.0;
+ description = "Short html part with a link to an image";
name = "HTML_SHORT_LINK_IMG_2";
}
symbol {
+ weight = 0.5;
+ description = "Short html part with a link to an image";
+ name = "HTML_SHORT_LINK_IMG_3";
+ }
+ symbol {
weight = 5.0;
description = "Suspicious boundary in header Content-Type";
name = "SUSPICIOUS_BOUNDARY";