Переглянути джерело

Start work on new HTML rules.

tags/1.0.0
Vsevolod Stakhov 9 роки тому
джерело
коміт
34bed7350e
4 змінених файлів з 64 додано та 7 видалено
  1. 53
    0
      conf/lua/html.lua
  2. 0
    3
      conf/lua/regexp/headers.lua
  3. 1
    4
      conf/lua/rspamd.lua
  4. 10
    0
      conf/metrics.conf

+ 53
- 0
conf/lua/html.lua Переглянути файл

@@ -0,0 +1,53 @@
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to you under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at:
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.

local reconf = config['regexp']
local rspamd_regexp = require "rspamd_regexp"

-- Messages that have only HTML part
reconf['MIME_HTML_ONLY'] = 'has_only_html_part()'

local function check_html_image(task, min, max)
local tp = task:get_text_parts()
for _,p in ipairs(tp) do
if p:is_html() then
local hc = p:get_html()
local len = p:get_raw_length()
if len >= min and len < max then
local images = hc:get_images()
if images then
for _,i in ipairs(images) do
if i['embedded'] then
return true
end
end
end
end
end
end
end

rspamd_config.HTML_SHORT_LINK_IMG_1 = function(task)
return check_html_image(task, 0, 1024)
end
rspamd_config.HTML_SHORT_LINK_IMG_2 = function(task)
return check_html_image(task, 1024, 1536)
end
rspamd_config.HTML_SHORT_LINK_IMG_3 = function(task)
return check_html_image(task, 1536, 2048)
end

+ 0
- 3
conf/lua/regexp/headers.lua Переглянути файл

@@ -70,10 +70,6 @@ reconf['R_MISSING_CHARSET']= string.format('content_type_is_type(text) & !conten
-- Subject seems to be spam
reconf['R_SAJDING'] = 'Subject=/\\bsajding(?:om|a)?\\b/iH'

reconf['MIME_HTML_ONLY'] = 'has_only_html_part()'


-- Find forged Outlook MUA
-- Yahoo groups messages
local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H'

+ 1
- 4
conf/lua/rspamd.lua Переглянути файл

@@ -32,16 +32,13 @@ dofile('regexp/headers.lua')
dofile('regexp/lotto.lua')
dofile('regexp/fraud.lua')
dofile('regexp/drugs.lua')
dofile('html.lua')

local reconf = config['regexp']
local util = require "rspamd_util"

-- Uncategorized rules

local html_length_1024_1536 = 'has_content_part_len(\'text\', \'html\', 1024, 1536)'
local html_link_image = '/<img /iPr'
reconf['HTML_SHORT_LINK_IMG_2'] = string.format('(%s) & (%s)', html_length_1024_1536, html_link_image)

-- Local rules
local r_bgcolor = '/BGCOLOR=/iP'
local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP'

+ 10
- 0
conf/metrics.conf Переглянути файл

@@ -430,8 +430,18 @@ metric {
symbol {
weight = 3.0;
description = "Short html part with a link to an image";
name = "HTML_SHORT_LINK_IMG_1";
}
symbol {
weight = 1.0;
description = "Short html part with a link to an image";
name = "HTML_SHORT_LINK_IMG_2";
}
symbol {
weight = 0.5;
description = "Short html part with a link to an image";
name = "HTML_SHORT_LINK_IMG_3";
}
symbol {
weight = 5.0;
description = "Suspicious boundary in header Content-Type";

Завантаження…
Відмінити
Зберегти