diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-23 16:11:49 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-23 16:11:49 +0100 |
commit | 34bed7350efbdd0a3b135b4b7dbf508bdbae9c1a (patch) | |
tree | f081d655fae27a88a97914e236f2982057ee9d59 /conf | |
parent | a3cfc0f8bde8975daf7f448ca19e31cf245fe7c1 (diff) | |
download | rspamd-34bed7350efbdd0a3b135b4b7dbf508bdbae9c1a.tar.gz rspamd-34bed7350efbdd0a3b135b4b7dbf508bdbae9c1a.zip |
Start work on new HTML rules.
Diffstat (limited to 'conf')
-rw-r--r-- | conf/lua/html.lua | 53 | ||||
-rw-r--r-- | conf/lua/regexp/headers.lua | 4 | ||||
-rw-r--r-- | conf/lua/rspamd.lua | 5 | ||||
-rw-r--r-- | conf/metrics.conf | 10 |
4 files changed, 64 insertions, 8 deletions
diff --git a/conf/lua/html.lua b/conf/lua/html.lua new file mode 100644 index 000000000..bd7abd987 --- /dev/null +++ b/conf/lua/html.lua @@ -0,0 +1,53 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at: +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +local reconf = config['regexp'] +local rspamd_regexp = require "rspamd_regexp" + +-- Messages that have only HTML part +reconf['MIME_HTML_ONLY'] = 'has_only_html_part()' + +local function check_html_image(task, min, max) + local tp = task:get_text_parts() + + for _,p in ipairs(tp) do + if p:is_html() then + local hc = p:get_html() + local len = p:get_raw_length() + + if len >= min and len < max then + local images = hc:get_images() + + if images then + for _,i in ipairs(images) do + if i['embedded'] then + return true + end + end + end + end + end + end +end + +rspamd_config.HTML_SHORT_LINK_IMG_1 = function(task) + return check_html_image(task, 0, 1024) +end +rspamd_config.HTML_SHORT_LINK_IMG_2 = function(task) + return check_html_image(task, 1024, 1536) +end +rspamd_config.HTML_SHORT_LINK_IMG_3 = function(task) + return check_html_image(task, 1536, 2048) +end
\ No newline at end of file diff --git a/conf/lua/regexp/headers.lua b/conf/lua/regexp/headers.lua index e6f079e86..e8bc7af44 100644 --- a/conf/lua/regexp/headers.lua +++ b/conf/lua/regexp/headers.lua @@ -70,10 +70,6 @@ reconf['R_MISSING_CHARSET']= string.format('content_type_is_type(text) & !conten -- Subject seems to be spam reconf['R_SAJDING'] = 'Subject=/\\bsajding(?:om|a)?\\b/iH' --- Messages that have only HTML part -reconf['MIME_HTML_ONLY'] = 'has_only_html_part()' - - -- Find forged Outlook MUA -- Yahoo groups messages local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H' diff --git a/conf/lua/rspamd.lua b/conf/lua/rspamd.lua index df480a72c..bb1709c69 100644 --- a/conf/lua/rspamd.lua +++ b/conf/lua/rspamd.lua @@ -32,16 +32,13 @@ dofile('regexp/headers.lua') dofile('regexp/lotto.lua') dofile('regexp/fraud.lua') dofile('regexp/drugs.lua') +dofile('html.lua') local reconf = config['regexp'] local util = require "rspamd_util" -- Uncategorized rules -local html_length_1024_1536 = 'has_content_part_len(\'text\', \'html\', 1024, 1536)' -local html_link_image = '/<img /iPr' -reconf['HTML_SHORT_LINK_IMG_2'] = string.format('(%s) & (%s)', html_length_1024_1536, html_link_image) - -- Local rules local r_bgcolor = '/BGCOLOR=/iP' local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP' diff --git a/conf/metrics.conf b/conf/metrics.conf index 95826765e..b3fc30eca 100644 --- a/conf/metrics.conf +++ b/conf/metrics.conf @@ -430,9 +430,19 @@ metric { symbol { weight = 3.0; description = "Short html part with a link to an image"; + name = "HTML_SHORT_LINK_IMG_1"; + } + symbol { + weight = 1.0; + description = "Short html part with a link to an image"; name = "HTML_SHORT_LINK_IMG_2"; } symbol { + weight = 0.5; + description = "Short html part with a link to an image"; + name = "HTML_SHORT_LINK_IMG_3"; + } + symbol { weight = 5.0; description = "Suspicious boundary in header Content-Type"; name = "SUSPICIOUS_BOUNDARY"; |