@@ -0,0 +1,466 @@ | |||
--[[ | |||
Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru> | |||
Copyright (c) 2013-2015, Alexey Savelyev <info@homeweb.ru> | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are met: | |||
1. Redistributions of source code must retain the above copyright notice, this | |||
list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright notice, | |||
this list of conditions and the following disclaimer in the documentation | |||
and/or other materials provided with the distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
]]-- | |||
-- Weight for checks_hellohost and checks_hello: 5 - very hard, 4 - hard, 3 - meduim, 2 - low, 1 - very low. | |||
-- From HFILTER_HELO_* and HFILTER_HOSTNAME_* symbols the maximum weight is selected in case of their actuating. | |||
--local dumper = require 'pl.pretty'.dump | |||
local rspamd_regexp = require "rspamd_regexp" | |||
local checks_hellohost = { | |||
['[.-]gprs[.-]'] = 5, ['gprs[.-][0-9]'] = 5, ['[0-9][.-]?gprs'] = 5, | |||
['[.-]cdma[.-]'] = 5, ['cdma[.-][0-9]'] = 5, ['[0-9][.-]?cdma'] = 5, | |||
['[.-]homeuser[.-]'] = 5, ['homeuser[.-][0-9]'] = 5, ['[0-9][.-]?homeuser'] = 5, | |||
['[.-]dhcp[.-]'] = 5, ['dhcp[.-][0-9]'] = 5, ['[0-9][.-]?dhcp'] = 5, | |||
['[.-]catv[.-]'] = 5, ['catv[.-][0-9]'] = 5, ['[0-9][.-]?catv'] = 5, | |||
['[.-]wifi[.-]'] = 5, ['wifi[.-][0-9]'] = 5, ['[0-9][.-]?wifi'] = 5, | |||
['[.-]dial-?up[.-]'] = 5, ['dial-?up[.-][0-9]'] = 5, ['[0-9][.-]?dial-?up'] = 5, | |||
['[.-]dynamic[.-]'] = 5, ['dynamic[.-][0-9]'] = 5, ['[0-9][.-]?dynamic'] = 5, | |||
['[.-]dyn[.-]'] = 5, ['dyn[.-][0-9]'] = 5, ['[0-9][.-]?dyn'] = 5, | |||
['[.-]clients?[.-]'] = 5, ['clients?[.-][0-9]'] = 5, ['[0-9][.-]?clients?'] = 5, | |||
['[.-]dynip[.-]'] = 5, ['dynip[.-][0-9]'] = 5, ['[0-9][.-]?dynip'] = 5, | |||
['[.-]broadband[.-]'] = 5, ['broadband[.-][0-9]'] = 5, ['[0-9][.-]?broadband'] = 5, | |||
['[.-]broad[.-]'] = 5, ['broad[.-][0-9]'] = 5, ['[0-9][.-]?broad'] = 5, | |||
['[.-]bredband[.-]'] = 5, ['bredband[.-][0-9]'] = 5, ['[0-9][.-]?bredband'] = 5, | |||
['[.-]nat[.-]'] = 5, ['nat[.-][0-9]'] = 5, ['[0-9][.-]?nat'] = 5, | |||
['[.-]pptp[.-]'] = 5, ['pptp[.-][0-9]'] = 5, ['[0-9][.-]?pptp'] = 5, | |||
['[.-]pppoe[.-]'] = 5, ['pppoe[.-][0-9]'] = 5, ['[0-9][.-]?pppoe'] = 5, | |||
['[.-]ppp[.-]'] = 5, ['ppp[.-][0-9]'] = 5, ['[0-9][.-]?ppp'] = 5, | |||
['[.-]modem[.-]'] = 5, ['modem[.-][0-9]'] = 5, ['[0-9][.-]?modem'] = 5, | |||
['[.-]cablemodem[.-]'] = 5, ['cablemodem[.-][0-9]'] = 5, ['[0-9][.-]?cablemodem'] = 5, | |||
['[.-]comcast[.-]'] = 5, ['comcast[.-][0-9]'] = 5, ['[0-9][.-]?comcast'] = 5, | |||
['[.-][a|x]?dsl-dynamic[.-]'] = 5, ['[a|x]?dsl-dynamic[.-]?[0-9]'] = 5, ['[0-9][.-]?[a|x]?dsl-dynamic'] = 5, | |||
['[.-][a|x]?dsl[.-]'] = 4, ['[a|x]?dsl[.-]?[0-9]'] = 4, ['[0-9][.-]?[a|x]?dsl'] = 4, | |||
['[.-][a|x]?dsl-line[.-]'] = 4, ['[a|x]?dsl-line[.-]?[0-9]'] = 4, ['[0-9][.-]?[a|x]?dsl-line'] = 4, | |||
['[.-]in-?addr[.-]'] = 4, ['in-?addr[.-][0-9]'] = 4, ['[0-9][.-]?in-?addr'] = 4, | |||
['[.-]pool[.-]'] = 4, ['pool[.-][0-9]'] = 4, ['[0-9][.-]?pool'] = 4, | |||
['[.-]fibertel[.-]'] = 4, ['fibertel[.-][0-9]'] = 4, ['[0-9][.-]?fibertel'] = 4, | |||
['[.-]fbx[.-]'] = 4, ['fbx[.-][0-9]'] = 4, ['[0-9][.-]?fbx'] = 4, | |||
['[.-]unused-addr[.-]'] = 3, ['unused-addr[.-][0-9]'] = 3, ['[0-9][.-]?unused-addr'] = 3, | |||
['[.-]cable[.-]'] = 3, ['cable[.-][0-9]'] = 3, ['[0-9][.-]?cable'] = 3, | |||
['[.-]kabel[.-]'] = 3, ['kabel[.-][0-9]'] = 3, ['[0-9][.-]?kabel'] = 3, | |||
['[.-]host[.-]'] = 2, ['host[.-][0-9]'] = 2, ['[0-9][.-]?host'] = 2, | |||
['[.-]customers?[.-]'] = 1, ['customers?[.-][0-9]'] = 1, ['[0-9][.-]?customers?'] = 1, | |||
['[.-]user[.-]'] = 1, ['user[.-][0-9]'] = 1, ['[0-9][.-]?user'] = 1, | |||
['[.-]peer[.-]'] = 1, ['peer[.-][0-9]'] = 1, ['[0-9][.-]?peer'] = 1 | |||
} | |||
local checks_hello = { | |||
['^[^\\.]+$'] = 5, -- for helo=COMPUTER, ANNA, etc... Without dot in helo | |||
['localhost$'] = 5, | |||
['^(dsl)?(device|speedtouch)\\.lan$'] = 5, | |||
['\\.(lan|local|home|localdomain|intra|in-addr.arpa|priv|online|user|veloxzon)$'] = 5 | |||
} | |||
local checks_hello_badip = { | |||
['^0\\.'] = 5, ['^::1$'] = 5, --loopback ipv4, ipv6 | |||
['^127\\.'] = 5, ['^10\\.'] = 5, ['^192\\.168\\.'] = 5, --local ipv4 | |||
['^172\\.1[6-9]\\.'] = 5, ['^172\\.2[0-9]\\.'] = 5, ['^172\\.3[01]\\.'] = 5, --local ipv4 | |||
['^169\\.254\\.'] = 5, --chanel ipv4 | |||
['^192\\.0\\.0\\.'] = 5, --IETF Protocol | |||
['^192\\.88\\.99\\.'] = 5, --RFC3068 | |||
['^100.6[4-9]\\.'] = 5, ['^100.[7-9]\\d\\.'] = 5, ['^100.1[01]\\d\\.'] = 5, ['^100.12[0-7]\\d\\.'] = 5, --RFC6598 | |||
['^\\d\\.\\d\\.\\d\\.255$'] = 5, --multicast ipv4 | |||
['^192\\.0\\.2\\.'] = 5, ['^198\\.51\\.100\\.'] = 5, ['^203\\.0\\.113\\.'] = 5, --sample | |||
['^fe[89ab][0-9a-f]::'] = 5, ['^fe[cdf][0-9a-f]:'] = 5, --local ipv6 (fe80:: - febf::, fec0:: - feff::) | |||
['^2001:db8::'] = 5, --reserved RFC 3849 for ipv6 | |||
['^fc00::'] = 5, ['^ffxx::'] = 5 --unicast, multicast ipv6 | |||
} | |||
local checks_hello_bareip = { | |||
'^\\d+[x.-]\\d+[x.-]\\d+[x.-]\\d+$', --bareip ipv4, | |||
'^[0-9a-f]+:' --bareip ipv6 | |||
} | |||
local config = { | |||
['helo_enabled'] = false, | |||
['hostname_enabled'] = false, | |||
['from_enabled'] = false, | |||
['rcpt_enabled'] = false, | |||
['mid_enabled'] = false, | |||
['url_enabled'] = false | |||
} | |||
local function check_regexp(str, regexp_text) | |||
local re = rspamd_regexp.create_cached(regexp_text, 'i') | |||
if re:match(str) then return true end | |||
return false | |||
end | |||
local function split(str, delim, maxNb) | |||
-- Eliminate bad cases... | |||
if string.find(str, delim) == nil then | |||
return { str } | |||
end | |||
if maxNb == nil or maxNb < 1 then | |||
maxNb = 0 -- No limit | |||
end | |||
local result = {} | |||
local pat = "(.-)" .. delim .. "()" | |||
local nb = 0 | |||
local lastPos | |||
for part, pos in string.gmatch(str, pat) do | |||
nb = nb + 1 | |||
result[nb] = part | |||
lastPos = pos | |||
if nb == maxNb then break end | |||
end | |||
-- Handle the last field | |||
if nb ~= maxNb then | |||
result[nb + 1] = string.sub(str, lastPos) | |||
end | |||
return result | |||
end | |||
local function check_fqdn(domain) | |||
if check_regexp(domain, '(?=^.{4,253}$)(^((?!-)[a-zA-Z0-9-]{1,63}(?<!-)\\.)+[a-zA-Z0-9-]{2,63}\\.?$)') then | |||
return true | |||
end | |||
return false | |||
end | |||
-- host: host for check | |||
-- symbol_suffix: suffix for symbol | |||
-- eq_ip: ip for comparing or empty string | |||
-- eq_host: host for comparing or empty string | |||
local function check_host(task, host, symbol_suffix, eq_ip, eq_host) | |||
local function check_host_cb_mx_a(resolver, to_resolve, results, err) | |||
task:inc_dns_req() | |||
if not results then | |||
task:insert_result('HFILTER_' .. symbol_suffix .. '_NORESOLVE_MX', 1.0) | |||
end | |||
end | |||
local function check_host_cb_mx(resolver, to_resolve, results, err) | |||
task:inc_dns_req() | |||
if not results then | |||
task:insert_result('HFILTER_' .. symbol_suffix .. '_NORES_A_OR_MX', 1.0) | |||
else | |||
for _,mx in pairs(results) do | |||
if mx['name'] then | |||
task:get_resolver():resolve_a(task:get_session(), task:get_mempool(), mx['name'], check_host_cb_mx_a) | |||
end | |||
end | |||
end | |||
end | |||
local function check_host_cb_a(resolver, to_resolve, results, err) | |||
task:inc_dns_req() | |||
if not results then | |||
task:get_resolver():resolve_mx(task:get_session(), task:get_mempool(), host, check_host_cb_mx) | |||
elseif eq_ip ~= '' then | |||
for _,result in pairs(results) do | |||
if result:to_string() == eq_ip then | |||
return true | |||
end | |||
end | |||
task:insert_result('HFILTER_' .. symbol_suffix .. '_IP_A', 1.0) | |||
end | |||
end | |||
if host then | |||
host = string.lower(host) | |||
else | |||
return false | |||
end | |||
if eq_host then | |||
eq_host = string.lower(eq_host) | |||
else | |||
eq_host = '' | |||
end | |||
if check_fqdn(host) then | |||
if eq_host == '' or eq_host ~= 'unknown' or eq_host ~= host then | |||
task:get_resolver():resolve_a(task:get_session(), task:get_mempool(), host, check_host_cb_a) | |||
end | |||
else | |||
task:insert_result('HFILTER_' .. symbol_suffix .. '_NOT_FQDN', 1.0) | |||
end | |||
return true | |||
end | |||
-- | |||
local function hfilter(task) | |||
-- Links checks | |||
if config['url_enabled'] then | |||
local parts = task:get_text_parts() | |||
if parts then | |||
--One text part-- | |||
local total_parts_len = 0 | |||
local text_parts_count = 0 | |||
local selected_text_part = nil | |||
for _,p in ipairs(parts) do | |||
total_parts_len = total_parts_len + p:get_length() | |||
if not p:is_html() then | |||
text_parts_count = text_parts_count + 1 | |||
selected_text_part = p | |||
end | |||
end | |||
if total_parts_len > 0 then | |||
local urls = task:get_urls() | |||
if urls then | |||
local total_url_len = 0 | |||
for _,url in ipairs(urls) do | |||
total_url_len = total_url_len + url:get_length() | |||
end | |||
if total_url_len > 0 then | |||
if total_url_len + 7 > total_parts_len then | |||
task:insert_result('HFILTER_URL_ONLY', 1.00) | |||
elseif text_parts_count == 1 and selected_text_part and selected_text_part:get_length() < 1024 then | |||
if selected_text_part:get_lines_count() < 2 then | |||
task:insert_result('HFILTER_URL_ONELINE', 1.00) | |||
end | |||
end | |||
end | |||
end | |||
end | |||
end | |||
end | |||
--No more checks for auth user | |||
if task:get_user() ~= nil then | |||
return false | |||
end | |||
--local message = task:get_message() | |||
local ip = false | |||
local rip = task:get_from_ip() | |||
if rip and rip:is_valid() then | |||
ip = rip:to_string() | |||
end | |||
-- Check's HELO | |||
local weight_helo = 0 | |||
if config['helo_enabled'] then | |||
local helo = task:get_helo() | |||
if helo then | |||
helo = string.gsub(helo, '[%[%]]', '') | |||
-- Regexp check HELO (checks_hello_badip) | |||
local find_badip = false | |||
for regexp,weight in pairs(checks_hello_badip) do | |||
if check_regexp(helo, regexp) then | |||
task:insert_result('HFILTER_HELO_BADIP', 1.0) | |||
find_badip = true | |||
break | |||
end | |||
end | |||
-- Regexp check HELO (checks_hello_bareip) | |||
local find_bareip = false | |||
if not find_badip then | |||
for _,regexp in pairs(checks_hello_bareip) do | |||
if check_regexp(helo, regexp) then | |||
task:insert_result('HFILTER_HELO_BAREIP', 1.0) | |||
find_bareip = true | |||
break | |||
end | |||
end | |||
end | |||
if not find_badip and not find_bareip then | |||
-- Regexp check HELO (checks_hello) | |||
for regexp,weight in pairs(checks_hello) do | |||
if check_regexp(helo, regexp) then | |||
weight_helo = weight | |||
break | |||
end | |||
end | |||
-- Regexp check HELO (checks_hellohost) | |||
for regexp,weight in pairs(checks_hellohost) do | |||
if check_regexp(helo, regexp) then | |||
if weight > weight_helo then | |||
weight_helo = weight | |||
end | |||
break | |||
end | |||
end | |||
--FQDN check HELO | |||
if ip and helo and weight_helo == 0 then | |||
check_host(task, helo, 'HELO', ip, hostname) | |||
end | |||
end | |||
else | |||
task:insert_result('HFILTER_HELO_UNKNOWN', 1.0) | |||
end | |||
end | |||
-- Check's HOSTNAME | |||
local weight_hostname = 0 | |||
if config['hostname_enabled'] then | |||
local hostname = task:get_hostname() | |||
if hostname then | |||
-- Check regexp HOSTNAME | |||
if hostname == 'unknown' then | |||
task:insert_result('HFILTER_HOSTNAME_UNKNOWN', 1.00) | |||
else | |||
for regexp,weight in pairs(checks_hellohost) do | |||
if check_regexp(hostname, regexp) then | |||
weight_hostname = weight | |||
break | |||
end | |||
end | |||
end | |||
else | |||
task:insert_result('HFILTER_HOSTNAME_UNKNOWN', 1.00) | |||
end | |||
end | |||
--Insert weight's for HELO or HOSTNAME | |||
if weight_helo > 0 and weight_helo >= weight_hostname then | |||
task:insert_result('HFILTER_HELO_' .. weight_helo, 1.0) | |||
elseif weight_hostname > 0 and weight_hostname > weight_helo then | |||
task:insert_result('HFILTER_HOSTNAME_' .. weight_hostname, 1.0) | |||
end | |||
-- MAILFROM checks -- | |||
local frombounce = false | |||
if config['from_enabled'] then | |||
local from = task:get_from(1) | |||
if from then | |||
--FROM host check | |||
for _,fr in ipairs(from) do | |||
local fr_split = split(fr['addr'], '@', 0) | |||
if table.maxn(fr_split) == 2 then | |||
check_host(task, fr_split[2], 'FROMHOST', '', '') | |||
if fr_split[1] == 'postmaster' then | |||
frombounce = true | |||
end | |||
end | |||
end | |||
else | |||
task:insert_result('HFILTER_FROM_BOUNCE', 1.00) | |||
frombounce = true | |||
end | |||
end | |||
-- Recipients checks -- | |||
if config['rcpt_enabled'] then | |||
local rcpt = task:get_recipients() | |||
if rcpt then | |||
local count_rcpt = table.maxn(rcpt) | |||
if frombounce then | |||
if count_rcpt > 1 then | |||
task:insert_result('HFILTER_RCPT_BOUNCEMOREONE', 1.00) | |||
end | |||
end | |||
end | |||
end | |||
--Message ID host check | |||
if config['mid_enabled'] then | |||
local message_id = task:get_message_id() | |||
if message_id then | |||
local mid_split = split(message_id, '@', 0) | |||
if table.maxn(mid_split) == 2 and not string.find(mid_split[2], 'local') then | |||
check_host(task, mid_split[2], 'MID', '', '') | |||
end | |||
end | |||
end | |||
return false | |||
end | |||
local symbols_enabled = {} | |||
local symbols_helo = { | |||
"HFILTER_HELO_BAREIP", | |||
"HFILTER_HELO_BADIP", | |||
"HFILTER_HELO_UNKNOWN", | |||
"HFILTER_HELO_1", | |||
"HFILTER_HELO_2", | |||
"HFILTER_HELO_3", | |||
"HFILTER_HELO_4", | |||
"HFILTER_HELO_5", | |||
"HFILTER_HELO_NORESOLVE_MX", | |||
"HFILTER_HELO_NORES_A_OR_MX", | |||
"HFILTER_HELO_IP_A", | |||
"HFILTER_HELO_NOT_FQDN" | |||
} | |||
local symbols_hostname = { | |||
"HFILTER_HOSTNAME_1", | |||
"HFILTER_HOSTNAME_2", | |||
"HFILTER_HOSTNAME_3", | |||
"HFILTER_HOSTNAME_4", | |||
"HFILTER_HOSTNAME_5", | |||
"HFILTER_HOSTNAME_UNKNOWN" | |||
} | |||
local symbols_rcpt = { | |||
"HFILTER_RCPT_BOUNCEMOREONE" | |||
} | |||
local symbols_mid = { | |||
"HFILTER_MID_NORESOLVE_MX", | |||
"HFILTER_MID_NORES_A_OR_MX", | |||
"HFILTER_MID_NOT_FQDN" | |||
} | |||
local symbols_url = { | |||
"HFILTER_URL_ONLY", | |||
"HFILTER_URL_ONELINE" | |||
} | |||
local symbols_from = { | |||
"HFILTER_FROMHOST_NORESOLVE_MX", | |||
"HFILTER_FROMHOST_NORES_A_OR_MX", | |||
"HFILTER_FROMHOST_NOT_FQDN", | |||
"HFILTER_FROM_BOUNCE" | |||
} | |||
local opts = rspamd_config:get_all_opt('hfilter') | |||
if opts then | |||
for k,v in pairs(opts) do | |||
config[k] = v | |||
end | |||
end | |||
local function append_t(t, a) | |||
for _,v in ipairs(a) do table.insert(t, v) end | |||
end | |||
if config['helo_enabled'] then | |||
append_t(symbols_enabled, symbols_helo) | |||
end | |||
if config['hostname_enabled'] then | |||
append_t(symbols_enabled, symbols_hostname) | |||
end | |||
if config['from_enabled'] then | |||
append_t(symbols_enabled, symbols_from) | |||
end | |||
if config['rcpt_enabled'] then | |||
append_t(symbols_enabled, symbols_rcpt) | |||
end | |||
if config['mid_enabled'] then | |||
append_t(symbols_enabled, symbols_mid) | |||
end | |||
if config['url_enabled'] then | |||
append_t(symbols_enabled, symbols_url) | |||
end | |||
--dumper(symbols_enabled) | |||
if table.maxn(symbols_enabled) > 0 then | |||
rspamd_config:register_symbols(hfilter, 1.0, "HFILTER", symbols_enabled); | |||
end |
@@ -0,0 +1,76 @@ | |||
-- Licensed to the Apache Software Foundation (ASF) under one or more | |||
-- contributor license agreements. See the NOTICE file distributed with | |||
-- this work for additional information regarding copyright ownership. | |||
-- The ASF licenses this file to you under the Apache License, Version 2.0 | |||
-- (the "License"); you may not use this file except in compliance with | |||
-- the License. You may obtain a copy of the License at: | |||
-- | |||
-- http://www.apache.org/licenses/LICENSE-2.0 | |||
-- | |||
-- Unless required by applicable law or agreed to in writing, software | |||
-- distributed under the License is distributed on an "AS IS" BASIS, | |||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
-- See the License for the specific language governing permissions and | |||
-- limitations under the License. | |||
local reconf = config['regexp'] | |||
local rspamd_regexp = require "rspamd_regexp" | |||
local rspamd_logger = require "rspamd_logger" | |||
-- Messages that have only HTML part | |||
reconf['MIME_HTML_ONLY'] = 'has_only_html_part()' | |||
local function check_html_image(task, min, max) | |||
local tp = task:get_text_parts() | |||
for _,p in ipairs(tp) do | |||
if p:is_html() then | |||
local hc = p:get_html() | |||
local len = p:get_raw_length() | |||
if len >= min and len < max then | |||
local images = hc:get_images() | |||
if images then | |||
for _,i in ipairs(images) do | |||
if i['embedded'] then | |||
return true | |||
end | |||
end | |||
end | |||
end | |||
end | |||
end | |||
end | |||
rspamd_config.HTML_SHORT_LINK_IMG_1 = function(task) | |||
return check_html_image(task, 0, 1024) | |||
end | |||
rspamd_config.HTML_SHORT_LINK_IMG_2 = function(task) | |||
return check_html_image(task, 1024, 1536) | |||
end | |||
rspamd_config.HTML_SHORT_LINK_IMG_3 = function(task) | |||
return check_html_image(task, 1536, 2048) | |||
end | |||
rspamd_config.R_EMPTY_IMAGE = function(task) | |||
local tp = task:get_text_parts() -- get text parts in a message | |||
for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs` | |||
if p:is_html() then -- if the current part is html part | |||
local hc = p:get_html() -- we get HTML context | |||
local len = p:get_length() -- and part's length | |||
if len < 50 then -- if we have a part that has less than 50 bytes of text | |||
local images = hc:get_images() -- then we check for HTML images | |||
if images then -- if there are images | |||
for _,i in ipairs(images) do -- then iterate over images in the part | |||
if i['height'] + i['width'] >= 400 then -- if we have a large image | |||
return true -- add symbol | |||
end | |||
end | |||
end | |||
end | |||
end | |||
end | |||
end |
@@ -0,0 +1,83 @@ | |||
-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license: | |||
-- | |||
-- Licensed to the Apache Software Foundation (ASF) under one or more | |||
-- contributor license agreements. See the NOTICE file distributed with | |||
-- this work for additional information regarding copyright ownership. | |||
-- The ASF licenses this file to you under the Apache License, Version 2.0 | |||
-- (the "License"); you may not use this file except in compliance with | |||
-- the License. You may obtain a copy of the License at: | |||
-- | |||
-- http://www.apache.org/licenses/LICENSE-2.0 | |||
-- | |||
-- Unless required by applicable law or agreed to in writing, software | |||
-- distributed under the License is distributed on an "AS IS" BASIS, | |||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
-- See the License for the specific language governing permissions and | |||
-- limitations under the License. | |||
-- | |||
-- Drugs spam (viagra, pills etc) | |||
-- XXX: remove this legacy to statfile | |||
local reconf = config['regexp'] | |||
local drugs_diet1 = '/(?:\\b|\\s)[_\\W]{0,3}p[_\\W]{0,3}h[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}n[_\\W]{0,3}t[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}r[_\\W]{0,3}m[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}n[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}(?:\\b|\\s)/irP' | |||
local drugs_diet2 = '/(?:\\b|\\s)_{0,3}[i1!|l\\xEC-\\xEF][_\\W]?o[_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?n_{0,3}\\b/irP' | |||
local drugs_diet3 = '/\\bbontril\\b/irP' | |||
local drugs_diet4 = '/\\bphendimetrazine\\b/irP' | |||
local drugs_diet5 = '/\\bdiethylpropion\\b/irP' | |||
local drugs_diet6 = '/(?:\\b|\\s)[_\\W]{0,3}M[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}r[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}d[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP' | |||
local drugs_diet7 = '/\\b_{0,3}t[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?u[_\\W]?a[_\\W]?t[_\\W]?[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP' | |||
local drugs_diet8 = '/\\b_{0,3}d[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?d[_\\W]?r[_\\W][e3\\xE8-\\xEB[_\\W]?xx?_{0,3}\\b/irP' | |||
local drugs_diet9 = '/\\b_{0,3}a[_\\W]?d[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?p[_\\W]?[e3\\xE8-\\xEB][_\\W]?x_{0,3}\\b/irP' | |||
local drugs_diet10 = '/\\b_{0,3}x?x[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?c[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l_{0,3}\\b/irP' | |||
reconf['DRUGS_DIET'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_diet1, drugs_diet2, drugs_diet3, drugs_diet4, drugs_diet5, drugs_diet6, drugs_diet7, drugs_diet8, drugs_diet9, drugs_diet10) | |||
local drugs_erectile1 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[ij1!|l\\xEC\\xED\\xEE\\xEF][_\\W]{0,3}[a40\\xE0-\\xE6@][_\\W]{0,3}[xyz]?[gj][_\\W]{0,3}r[_\\W]{0,3}[a40\\xE0-\\xE6@][_\\W]{0,3}x?[_\\W]{0,3}(?:\\b|\\s)/irP' | |||
local drugs_erectile2 = '/\\bV(?:agira|igara|iaggra|iaegra)\\b/irP' | |||
local drugs_erectile3 = '/(?:\\A|[\\s\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\x7f])[_\\W]{0,3}C[_\\W]{0,3}[ij1!|l\\xEC\\xED\\xEE\\xEF][_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}l?[l!|1][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}s[_\\W]{0,3}(?:\\b|\\s)/irP' | |||
local drugs_erectile4 = '/\\bC(?:alis|ilias|ilais)\\b/irP' | |||
local drugs_erectile5 = '/\\b_{0,3}s[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l[_\\W]?d[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l c[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?t[_\\W]?[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP' | |||
local drugs_erectile6 = '/\\b_{0,3}L[_\\W]?[e3\\xE8-\\xEB][_\\W]?(?:\\\\\\/|V)[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?(?:\\b|\\s)/irP' | |||
local drugs_erectile8 = '/\\b_{0,3}T[_\\W]?[a4\\xE0-\\xE6@][_\\W]?d[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l[_\\W]?[a4\\xE0-\\xE6@][_\\W]?f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l_{0,3}\\b/irP' | |||
local drugs_erectile10 = '/\\b_{0,3}V[_\\W]?(?:i|\\ï\\;)[_\\W]?(?:a|\\à|\\å)\\;?[_\\W]?g[_\\W]?r[_\\W]?(?:a|\\à|\\å)\\b/irP' | |||
local drugs_erectile11 = '/(?:\\b|\\s)_{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}p[_\\W]{0,3}c[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}[l!|1][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}s_{0,3}\\b/irP' | |||
reconf['DRUGS_ERECTILE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_erectile1, drugs_erectile2, drugs_erectile3, drugs_erectile4, drugs_erectile5, drugs_erectile6, drugs_erectile8, drugs_erectile10, drugs_erectile11) | |||
local drugs_anxiety1 = '/(?:\\b|\\s)[_\\W]{0,3}x?x[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}n[_\\W]{0,3}[ea4\\xE1\\xE2\\xE3@][_\\W]{0,3}xx?_{0,3}\\b/irP' | |||
local drugs_anxiety2 = '/\\bAlprazolam\\b/irP' | |||
local drugs_anxiety3 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}[l|][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[u\\xB5\\xF9-\\xFC][_\\W]{0,3}m\\b/irP' | |||
local drugs_anxiety4 = '/\\b_{0,3}D[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?[ea3\\xE9\\xEA\\xEB][_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m_{0,3}\\b/irP' | |||
local drugs_anxiety5 = '/(?:\\b|\\s)[a4\\xE0-\\xE6@][_\\W]?t[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?v[_\\W]?[a4\\xE0-\\xE6@][_\\W]?n_{0,3}\\b/irP' | |||
local drugs_anxiety6 = '/\\b_{0,3}l[_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?[e3\\xE8-\\xEB][_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m_{0,3}\\b/irP' | |||
local drugs_anxiety7 = '/\\b_{0,3}c[_\\W]?l[_\\W]?[o0\\xF2-\\xF6][_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?e[_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m\\b/irP' | |||
local drugs_anxiety8 = '/\\bklonopin\\b/irP' | |||
local drugs_anxiety9 = '/\\brivotril\\b/irP' | |||
reconf['DRUGS_ANXIETY'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_anxiety1, drugs_anxiety2, drugs_anxiety3, drugs_anxiety4, drugs_anxiety5, drugs_anxiety6, drugs_anxiety7, drugs_anxiety8, drugs_anxiety9) | |||
reconf['DRUGS_ANXIETY_EREC'] = string.format('(%s) & (%s)', reconf['DRUGS_ERECTILE'], reconf['DRUGS_ANXIETY']) | |||
local drugs_pain1 = '/\\b_{0,3}h[_\\W]?y[_\\W]?d[_\\W]?r[_\\W]?[o0\\xF2-\\xF6][_\\W]?c[_\\W]?[o0\\xF2-\\xF6][_\\W]?d[_\\W]?[o0\\xF2-\\xF6][_\\W]?n[_\\W]?e_{0,3}\\b/irP' | |||
local drugs_pain2 = '/\\b_{0,3}c[o0\\xF2-\\xF6]deine_{0,3}\\b/irP' | |||
local drugs_pain3 = '/(?:\\b|\\s)[_\\W]{0,3}[u\\xB5\\xF9-\\xFC][_\\W]{0,3}l[_\\W]{0,3}t[_\\W]{0,3}r[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}m_{0,3}\\b/irP' | |||
local drugs_pain4 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}c[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}d[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}ns?[_\\W]{0,3}(?:\\b|\\s)/irP' | |||
local drugs_pain5 = '/\\b_{0,3}t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m[_\\W]?[a4\\xE0-\\xE6@][_\\W]?d[_\\W]?[o0\\xF2-\\xF6][_\\W]?[l!|1]_{0,3}\\b/irP' | |||
local drugs_pain6 = '/\\b_{0,3}u[_\\W]?l[_\\W]?t[_\\W]?r[_\\W]?a[_\\W]?c[_\\W]?e[_\\W]?t_{0,3}\\b/irP' | |||
local drugs_pain7 = '/\\b_{0,3}f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?c[_\\W]?[e3\\xE8-\\xEB][_\\W]?[t7]_{0,3}\\b/irP' | |||
local drugs_pain8 = '/\\b_{0,3}c[_\\W]?[e3\\xE8-\\xEB][_\\W]?l[_\\W]?[e3\\xE8-\\xEB][_\\W]?b[_\\W]?r[_\\W]?[e3\\xE8-\\xEB][_\\W]?x_{0,3}\\b/irP' | |||
local drugs_pain9 = '/(?:\\b|\\s)_{0,3}[i1!|l\\xEC-\\xEF]m[i1!|l\\xEC-\\xEF]tr[e3\\xE8-\\xEB]x_{0,3}\\b/irP' | |||
local drugs_pain10 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}x[_\\W]{0,3}xx?_{0,3}\\b/irP' | |||
local drugs_pain11 = '/\\bzebutal\\b/irP' | |||
local drugs_pain12 = '/\\besgic plus\\b/irP' | |||
local drugs_pain13 = '/\\bD[_\\W]?[a4\\xE0-\\xE6@][_\\W]?r[_\\W]?v[_\\W]?[o0\\xF2-\\xF6][_\\W]?n\\b/irP' | |||
local drugs_pain14 = '/N[o0\\xF2-\\xF6]rc[o0\\xF2-\\xF6]/irP' | |||
local drugs_pain = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) || (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_pain1, drugs_pain2, drugs_pain3, drugs_pain4, drugs_pain5, drugs_pain6, drugs_pain7, drugs_pain8, drugs_pain9, drugs_pain10, drugs_pain11, drugs_pain12, drugs_pain13, drugs_pain14) | |||
local drugs_sleep1 = '/(?:\\b|\\s)[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}m[_\\W]{0,3}b[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}n[_\\W]{0,3}(?:\\b|\\s)/irP' | |||
local drugs_sleep2 = '/(?:\\b|\\s)[_\\W]{0,3}S[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}n[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}t[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP' | |||
local drugs_sleep3 = '/\\b_{0,3}R[_\\W]?[e3\\xE8-\\xEB][_\\W]?s[_\\W]?t[_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?i[_\\W]?l_{0,3}\\b/irP' | |||
local drugs_sleep4 = '/\\b_{0,3}H[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l[_\\W]?c[_\\W]?i[_\\W]?[o0\\xF2-\\xF6][_\\W]?n_{0,3}\\b/irP' | |||
local drugs_sleep = string.format('(%s) | (%s) | (%s) | (%s)', drugs_sleep1, drugs_sleep2, drugs_sleep3, drugs_sleep4) | |||
local drugs_muscle1 = '/(?:\\b|\\s)[_\\W]{0,3}s[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}m[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP' | |||
local drugs_muscle2 = '/\\b_{0,3}cycl[o0\\xF2-\\xF6]b[e3\\xE8-\\xEB]nz[a4\\xE0-\\xE6@]pr[i1!|l\\xEC-\\xEF]n[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP' | |||
local drugs_muscle3 = '/\\b_{0,3}f[_\\W]?l[_\\W]?[e3\\xE8-\\xEB][_\\W]?x[_\\W]?[e3\\xE8-\\xEB][_\\W]?r[_\\W]?[i1!|l\\xEC-\\xEF]_{0,3}[_\\W]?l_{0,3}\\b/irP' | |||
local drugs_muscle4 = '/\\b_{0,3}z[_\\W]?a[_\\W]?n[_\\W]?a[_\\W]?f[_\\W]?l[_\\W]?e[_\\W]?x_{0,3}\\b/irP' | |||
local drugs_muscle5 = '/\\bskelaxin\\b/irP' | |||
reconf['DRUGS_MUSCLE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_muscle1, drugs_muscle2, drugs_muscle3, drugs_muscle4, drugs_muscle5) | |||
reconf['DRUGS_MANYKINDS'] = string.format('((%s) | (%s) | (%s)) & ((%s) + (%s) + (%s) + (%s) + (%s) + (%s) >= 3)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], reconf['DRUGS_ERECTILE'], reconf['DRUGS_DIET'], drugs_pain, drugs_sleep, reconf['DRUGS_MUSCLE'], reconf['DRUGS_ANXIETY']) | |||
@@ -0,0 +1,74 @@ | |||
-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license: | |||
-- | |||
-- Licensed to the Apache Software Foundation (ASF) under one or more | |||
-- contributor license agreements. See the NOTICE file distributed with | |||
-- this work for additional information regarding copyright ownership. | |||
-- The ASF licenses this file to you under the Apache License, Version 2.0 | |||
-- (the "License"); you may not use this file except in compliance with | |||
-- the License. You may obtain a copy of the License at: | |||
-- | |||
-- http://www.apache.org/licenses/LICENSE-2.0 | |||
-- | |||
-- Unless required by applicable law or agreed to in writing, software | |||
-- distributed under the License is distributed on an "AS IS" BASIS, | |||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
-- See the License for the specific language governing permissions and | |||
-- limitations under the License. | |||
-- | |||
-- Fraud messages (Nigeria spam, viagra, etc) | |||
local reconf = config['regexp'] | |||
local fraud_dbi = '/(?:\\bdollars?\\b|\\busd(?:ollars)?(?:[0-9]|\\b)|\\bus\\$|\\$[0-9,.]{6,}|\\$[0-9].{0,8}[mb]illion|\\$[0-9.,]{2,10} ?m|\\beuros?\\b|u[.]?s[.]? [0-9.]+ m)/irP' | |||
local fraud_kjv = '/(?:claim|concerning) (?:the|this) money/irP' | |||
local fraud_irj = '/(?:finance|holding|securit(?:ies|y)) (?:company|firm|storage house)/irP' | |||
local fraud_neb = '/(?:government|bank) of nigeria/irP' | |||
local fraud_xjr = '/(?:who was a|as a|an? honest|you being a|to any) foreigner/irP' | |||
local fraud_dpr = '/\\b(?:(?:respond|reply) (?:urgently|immediately)|(?:urgent|immediate|earliest) (?:reply|response))\\b/irP' | |||
local fraud_pts = '/\\b(?:ass?ass?inat(?:ed|ion)|murder(?:e?d)?|kill(?:ed|ing)\\b[^.]{0,99}\\b(?:war veterans|rebels?))\\b/irP' | |||
local fraud_bep = '/\\b(?:bank of nigeria|central bank of|trust bank|apex bank|amalgamated bank)\\b/irP' | |||
local fraud_tdp = '/\\b(?:business partner(?:s|ship)?|silent partner(?:s|ship)?)\\b/irP' | |||
local fraud_gan = '/\\b(?:charles taylor|serena|abacha|gu[eйи]i|sese[- ]?seko|kabila)\\b/irP' | |||
local fraud_irt = '/\\b(?:compliments? of the|dear friend|dear sir|yours faithfully|season\'?s greetings)\\b/irP' | |||
local fraud_aon = '/\\b(?:confidential|private|alternate|alternative) (?:(?:e-? *)?mail)\\b/irP' | |||
local fraud_wny = '/\\b(?:disburse?(?:ment)?|incurr?(?:ed)?|remunerr?at(?:ed?|ion)|remm?itt?(?:ed|ance|ing)?)\\b/irP' | |||
local fraud_ipk = '/\\b(?:in|to|visit) your country\\b/irP' | |||
local fraud_qxx = '/\\b(?:my name is|i am) (?:mrs?|engr|barrister|dr|prince(?:ss)?)[. ]/irP' | |||
local fraud_iou = '/\\b(?:no risks?|risk-? *free|free of risks?|100% safe)\\b/irP' | |||
local fraud_ezy = '/\\b(?:of|the) late president\\b/irP' | |||
local fraud_mly = '/\\b(?:reply|respond)\\b[^.]{0,50}\\b(?:to|through)\\b[^.]{0,50}\\@\\b/irP' | |||
local fraud_zfj = '/\\b(?:wife|son|brother|daughter) of the late\\b/irP' | |||
local fraud_kdt = '/\\bU\\.?S\\.?(?:D\\.?)?\\s*(?:\\$\\s*)?(?:\\d+,\\d+,\\d+|\\d+\\.\\d+\\.\\d+|\\d+(?:\\.\\d+)?\\s*milli?on)/irP' | |||
local fraud_ulk = '/\\baffidavits?\\b/irP' | |||
local fraud_bgp = '/\\battached to ticket number\\b/irP' | |||
local fraud_fbi = '/\\bdisburs/irP' | |||
local fraud_jbu = '/\\bforeign account\\b/irP' | |||
local fraud_yww = '/\\bfurnish you with\\b/irP' | |||
local fraud_jyg = '/\\bgive\\s+you .{0,15}(?:fund|money|total|sum|contact|percent)\\b/irP' | |||
local fraud_xvw = '/\\bhonest cooperation\\b/irP' | |||
local fraud_uuy = '/\\blegitimate business(?:es)?\\b/irP' | |||
local fraud_snt = '/\\blocate(?: .{1,20})? extended relative/irP' | |||
local fraud_ltx = '/\\bmilli?on (?:.{1,25} thousand\\s*)?(?:(?:united states|u\\.?s\\.?) dollars|(?i:U\\.?S\\.?D?))\\b/irP' | |||
local fraud_jnb = '/\\boperat(?:e|ing)\\b[^.]{0,99}\\b(?:for(?:ei|ie)gn|off-? ?shore|over-? ?seas?) (?:bank )?accounts?\\b/irP' | |||
local fraud_qfy = '/\\bover-? *(?:invoiced?|cost(?:s|ing)?)\\b/irP' | |||
local fraud_wdr = '/\\bprivate lawyer\\b/irP' | |||
local fraud_wfc = '/\\bsecur(?:e|ing) (?:the )?(?:funds?|monies)\\b/irP' | |||
local fraud_aum = '/\\bthe desk of\\b/irP' | |||
local fraud_mcq = '/\\btransaction\\b.{1,30}\\b(?:magnitude|diplomatic|strict|absolute|secret|confiden(?:tial|ce)|guarantee)/irP' | |||
local fraud_etx = '/\\byour\\b[^.]{0,99}\\b(?:contact (?:details|information)|private (?:e?[- ]?mail|telephone|tel|phone|fax))\\b/irP' | |||
local fraud_pvn = '/as the beneficiary/irP' | |||
local fraud_fvu = '/award notification/irP' | |||
local fraud_ckf = '/computer ballot system/irP' | |||
local fraud_fcw = '/fiduciary agent/irP' | |||
local fraud_mqo = '/foreign (?:business partner|customer)/irP' | |||
local fraud_tcc = '/foreign (?:offshore )?(?:bank|account)/irP' | |||
local fraud_gbw = '/god gives .{1,10}second chance/irP' | |||
local fraud_nrg = '/i am contacting you/irP' | |||
local fraud_rlx = '/lott(?:o|ery) (?:co,?ordinator|international)/irP' | |||
local fraud_axf = '/magnanimity/irP' | |||
local fraud_thj = '/modalit(?:y|ies)/irP' | |||
local fraud_yqv = '/nigerian? (?:national|government)/irP' | |||
local fraud_yja = '/over-invoice/irP' | |||
local fraud_ypo = '/the total sum/irP' | |||
local fraud_uoq = '/vital documents/irP' | |||
reconf['ADVANCE_FEE_2'] = string.format('((%s) | (%s) | (%s)) & ((%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) >= 2)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], fraud_kjv, fraud_irj, fraud_neb, fraud_xjr, fraud_ezy, fraud_zfj, fraud_kdt, fraud_bgp, fraud_fbi, fraud_jbu, fraud_jyg, fraud_xvw, fraud_snt, fraud_ltx, fraud_mcq, fraud_pvn, fraud_fvu, fraud_ckf, fraud_fcw, fraud_mqo, fraud_tcc, fraud_gbw, fraud_nrg, fraud_rlx, fraud_axf, fraud_thj, fraud_yqv, fraud_yja, fraud_ypo, fraud_uoq, fraud_dbi, fraud_bep, fraud_dpr, fraud_qxx, fraud_qfy, fraud_pts, fraud_tdp, fraud_gan, fraud_ipk, fraud_aon, fraud_wny, fraud_aum, fraud_wfc, fraud_yww, fraud_ulk, fraud_iou, fraud_jnb, fraud_irt, fraud_etx, fraud_wdr, fraud_uuy, fraud_mly) | |||
reconf['ADVANCE_FEE_3'] = string.format('((%s) | (%s) | (%s)) & ((%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) >= 3)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], fraud_kjv, fraud_irj, fraud_neb, fraud_xjr, fraud_ezy, fraud_zfj, fraud_kdt, fraud_bgp, fraud_fbi, fraud_jbu, fraud_jyg, fraud_xvw, fraud_snt, fraud_ltx, fraud_mcq, fraud_pvn, fraud_fvu, fraud_ckf, fraud_fcw, fraud_mqo, fraud_tcc, fraud_gbw, fraud_nrg, fraud_rlx, fraud_axf, fraud_thj, fraud_yqv, fraud_yja, fraud_ypo, fraud_uoq, fraud_dbi, fraud_bep, fraud_dpr, fraud_qxx, fraud_qfy, fraud_pts, fraud_tdp, fraud_gan, fraud_ipk, fraud_aon, fraud_wny, fraud_aum, fraud_wfc, fraud_yww, fraud_ulk, fraud_iou, fraud_jnb, fraud_irt, fraud_etx, fraud_wdr, fraud_uuy, fraud_mly) |
@@ -0,0 +1,491 @@ | |||
-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license: | |||
-- | |||
-- Licensed to the Apache Software Foundation (ASF) under one or more | |||
-- contributor license agreements. See the NOTICE file distributed with | |||
-- this work for additional information regarding copyright ownership. | |||
-- The ASF licenses this file to you under the Apache License, Version 2.0 | |||
-- (the "License"); you may not use this file except in compliance with | |||
-- the License. You may obtain a copy of the License at: | |||
-- | |||
-- http://www.apache.org/licenses/LICENSE-2.0 | |||
-- | |||
-- Unless required by applicable law or agreed to in writing, software | |||
-- distributed under the License is distributed on an "AS IS" BASIS, | |||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
-- See the License for the specific language governing permissions and | |||
-- limitations under the License. | |||
-- | |||
-- Definitions of header regexps | |||
local reconf = config['regexp'] | |||
local rspamd_regexp = require "rspamd_regexp" | |||
-- Subject needs encoding | |||
-- Define encodings types | |||
local subject_encoded_b64 = 'Subject=/=\\?\\S+\\?B\\?/iX' | |||
local subject_encoded_qp = 'Subject=/=\\?\\S+\\?Q\\?/iX' | |||
-- Define whether subject must be encoded (contains non-7bit characters) | |||
local subject_needs_mime = 'Subject=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X' | |||
-- Final rule | |||
reconf['SUBJECT_NEEDS_ENCODING'] = string.format('!(%s) & !(%s) & (%s)', subject_encoded_b64, subject_encoded_qp, subject_needs_mime) | |||
-- Detects that there is no space in From header (e.g. Some Name<some@host>) | |||
reconf['R_NO_SPACE_IN_FROM'] = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X' | |||
-- Detects missing subject | |||
local has_subject = 'header_exists(Subject)' | |||
local empty_subject = 'Subject=/^$/' | |||
-- Final rule | |||
reconf['MISSING_SUBJECT'] = string.format('!(%s) | (%s)', has_subject, empty_subject) | |||
-- Detects bad content-transfer-encoding for text parts | |||
-- For text parts (text/plain and text/html mainly) | |||
local r_ctype_text = 'content_type_is_type(text)' | |||
-- Content transfer encoding is 7bit | |||
local r_cte_7bit = 'compare_transfer_encoding(7bit)' | |||
-- And body contains 8bit characters | |||
local r_body_8bit = '/[^\\x01-\\x7f]/Pr' | |||
reconf['R_BAD_CTE_7BIT'] = string.format('(%s) & (%s) & (%s)', r_ctype_text, r_cte_7bit, r_body_8bit) | |||
-- Detects missing To header | |||
reconf['MISSING_TO']= '!raw_header_exists(To)'; | |||
-- Detects undisclosed recipients | |||
local undisc_rcpt = 'To=/^<?undisclosed[- ]recipient/Hi' | |||
reconf['R_UNDISC_RCPT'] = string.format('(%s) | (%s)', reconf['MISSING_TO'], undisc_rcpt) | |||
-- Detects missing Message-Id | |||
local has_mid = 'header_exists(Message-Id)' | |||
reconf['MISSING_MID'] = '!header_exists(Message-Id)'; | |||
-- Received seems to be fake | |||
reconf['R_RCVD_SPAMBOTS'] = 'Received=/^from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by [-.\\w+]{5,255}; [SMTWF][a-z][a-z], [\\s\\d]?\\d [JFMAJSOND][a-z][a-z] \\d{4} \\d{2}:\\d{2}:\\d{2} [-+]\\d{4}$/mH' | |||
-- To header seems to be autogenerated | |||
reconf['R_TO_SEEMS_AUTO'] = 'To=/^\\"?(?<bt>[-.\\w]{1,64})\\"?\\s<\\k<bt>\\@/H' | |||
-- Charset is missing in message | |||
reconf['R_MISSING_CHARSET']= string.format('content_type_is_type(text) & !content_type_has_param(charset) & !%s', r_cte_7bit); | |||
-- Subject seems to be spam | |||
reconf['R_SAJDING'] = 'Subject=/\\bsajding(?:om|a)?\\b/iH' | |||
-- Find forged Outlook MUA | |||
-- Yahoo groups messages | |||
local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H' | |||
-- Outlook MUA | |||
local outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H' | |||
local any_outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H' | |||
reconf['FORGED_OUTLOOK_HTML'] = string.format('!%s & %s & %s', yahoo_bulk, outlook_mua, 'has_only_html_part()') | |||
-- Recipients seems to be likely with each other (only works when recipients count is more than 5 recipients) | |||
reconf['SUSPICIOUS_RECIPS'] = 'compare_recipients_distance(0.65)' | |||
-- Recipients list seems to be sorted | |||
reconf['SORTED_RECIPS'] = 'is_recipients_sorted()' | |||
-- Spam string at the end of message to make statistics faults | |||
reconf['TRACKER_ID'] = '/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\\s*\\z/isPr' | |||
-- From that contains encoded characters while base 64 is not needed as all symbols are 7bit | |||
-- Regexp that checks that From header is encoded with base64 (search in raw headers) | |||
local from_encoded_b64 = 'From=/\\=\\?\\S+\\?B\\?/iX' | |||
-- From contains only 7bit characters (parsed headers are used) | |||
local from_needs_mime = 'From=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' | |||
-- Final rule | |||
reconf['FROM_EXCESS_BASE64'] = string.format('%s & !%s', from_encoded_b64, from_needs_mime) | |||
-- From that contains encoded characters while quoted-printable is not needed as all symbols are 7bit | |||
-- Regexp that checks that From header is encoded with quoted-printable (search in raw headers) | |||
local from_encoded_qp = 'From=/\\=\\?\\S+\\?Q\\?/iX' | |||
-- Final rule | |||
reconf['FROM_EXCESS_QP'] = string.format('%s & !%s', from_encoded_qp, from_needs_mime) | |||
-- To that contains encoded characters while base 64 is not needed as all symbols are 7bit | |||
-- Regexp that checks that To header is encoded with base64 (search in raw headers) | |||
local to_encoded_b64 = 'To=/\\=\\?\\S+\\?B\\?/iX' | |||
-- To contains only 7bit characters (parsed headers are used) | |||
local to_needs_mime = 'To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' | |||
-- Final rule | |||
reconf['TO_EXCESS_BASE64'] = string.format('%s & !%s', to_encoded_b64, to_needs_mime) | |||
-- To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit | |||
-- Regexp that checks that To header is encoded with quoted-printable (search in raw headers) | |||
local to_encoded_qp = 'To=/\\=\\?\\S+\\?Q\\?/iX' | |||
-- Final rule | |||
reconf['TO_EXCESS_QP'] = string.format('%s & !%s', to_encoded_qp, to_needs_mime) | |||
-- Reply-To that contains encoded characters while base 64 is not needed as all symbols are 7bit | |||
-- Regexp that checks that Reply-To header is encoded with base64 (search in raw headers) | |||
local replyto_encoded_b64 = 'Reply-To=/\\=\\?\\S+\\?B\\?/iX' | |||
-- Reply-To contains only 7bit characters (parsed headers are used) | |||
local replyto_needs_mime = 'Reply-To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' | |||
-- Final rule | |||
reconf['REPLYTO_EXCESS_BASE64'] = string.format('%s & !%s', replyto_encoded_b64, replyto_needs_mime) | |||
-- Reply-To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit | |||
-- Regexp that checks that Reply-To header is encoded with quoted-printable (search in raw headers) | |||
local replyto_encoded_qp = 'Reply-To=/\\=\\?\\S+\\?Q\\?/iX' | |||
-- Final rule | |||
reconf['REPLYTO_EXCESS_QP'] = string.format('%s & !%s', replyto_encoded_qp, replyto_needs_mime) | |||
-- Cc that contains encoded characters while base 64 is not needed as all symbols are 7bit | |||
-- Regexp that checks that Cc header is encoded with base64 (search in raw headers) | |||
local cc_encoded_b64 = 'Cc=/\\=\\?\\S+\\?B\\?/iX' | |||
-- Co contains only 7bit characters (parsed headers are used) | |||
local cc_needs_mime = 'Cc=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' | |||
-- Final rule | |||
reconf['CC_EXCESS_BASE64'] = string.format('%s & !%s', cc_encoded_b64, cc_needs_mime) | |||
-- Cc that contains encoded characters while quoted-printable is not needed as all symbols are 7bit | |||
-- Regexp that checks that Cc header is encoded with quoted-printable (search in raw headers) | |||
local cc_encoded_qp = 'Cc=/\\=\\?\\S+\\?Q\\?/iX' | |||
-- Final rule | |||
reconf['CC_EXCESS_QP'] = string.format('%s & !%s', cc_encoded_qp, cc_needs_mime) | |||
-- Detect forged outlook headers | |||
-- OE X-Mailer header | |||
local oe_mua = 'X-Mailer=/\\bOutlook Express [456]\\./H' | |||
-- OE Message ID format | |||
local oe_msgid_1 = 'Message-Id=/^<?[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\\@hotmail\\.com>?$/mH' | |||
local oe_msgid_2 = 'Message-Id=/^<?(?:[0-9a-f]{8}|[0-9a-f]{12})\\$[0-9a-f]{8}\\$[0-9a-f]{8}\\@\\S+>?$/H' | |||
-- EZLM remail of message | |||
local lyris_ezml_remailer = 'List-Unsubscribe=/<mailto:(?:leave-\\S+|\\S+-unsubscribe)\\@\\S+>$/H' | |||
-- Header of wacky sendmail | |||
local wacky_sendmail_version = 'Received=/\\/CWT\\/DCE\\)/H' | |||
-- Iplanet received header | |||
local iplanet_messaging_server = 'Received=/iPlanet Messaging Server/H' | |||
-- Hotmail message id | |||
local hotmail_baydav_msgid = 'Message-Id=/^<?BAY\\d+-DAV\\d+[A-Z0-9]{25}\\@phx\\.gbl?>$/H' | |||
-- Sympatico message id | |||
local sympatico_msgid = 'Message-Id=/^<?BAYC\\d+-PASMTP\\d+[A-Z0-9]{25}\\@CEZ\\.ICE>?$/H' | |||
-- Mailman message id | |||
local mailman_msgid = 'Message-ID=/^<mailman\\.\\d+\\.\\d+\\.\\d+\\..+\\@\\S+>$/H' | |||
-- Message id seems to be forged | |||
local unusable_msgid = string.format('(%s | %s | %s | %s | %s | %s)', | |||
lyris_ezml_remailer, wacky_sendmail_version, iplanet_messaging_server, hotmail_baydav_msgid, sympatico_msgid, mailman_msgid) | |||
-- Outlook express data seems to be forged | |||
local forged_oe = string.format('(%s & !%s & !%s & !%s)', oe_mua, oe_msgid_1, oe_msgid_2, unusable_msgid) | |||
-- Outlook specific headers | |||
local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\\./H' | |||
local outlook_dollars_other = 'Message-Id=/^<?\\!\\~\\!>?/H' | |||
local vista_msgid = 'Message-Id=/^<?[A-F\\d]{32}\\@\\S+>?$/H' | |||
local ims_msgid = 'Message-Id=/^<?[A-F\\d]{36,40}\\@\\S+>?$/H' | |||
-- Forged outlook headers | |||
local forged_outlook_dollars = string.format('(%s & !%s & !%s & !%s & !%s & !%s)', | |||
outlook_dollars_mua, oe_msgid_2, outlook_dollars_other, vista_msgid, ims_msgid, unusable_msgid) | |||
-- Outlook versions that should be excluded from summary rule | |||
local fmo_excl_o3416 = 'X-Mailer=/^Microsoft Outlook, Build 10.0.3416$/H' | |||
local fmo_excl_oe3790 = 'X-Mailer=/^Microsoft Outlook Express 6.00.3790.3959$/H' | |||
-- Summary rule for forged outlook | |||
reconf['FORGED_MUA_OUTLOOK'] = string.format('(%s | %s) & !%s & !%s & !%s', | |||
forged_oe, forged_outlook_dollars, fmo_excl_o3416, fmo_excl_oe3790, vista_msgid) | |||
-- HTML outlook signs | |||
local mime_html = 'content_type_is_type(text) & content_type_is_subtype(/.?html/)' | |||
local tag_exists_html = 'has_html_tag(html)' | |||
local tag_exists_head = 'has_html_tag(head)' | |||
local tag_exists_meta = 'has_html_tag(meta)' | |||
local tag_exists_body = 'has_html_tag(body)' | |||
reconf['FORGED_OUTLOOK_TAGS'] = string.format('!%s & %s & %s & !(%s & %s & %s & %s)', | |||
yahoo_bulk, any_outlook_mua, mime_html, tag_exists_html, tag_exists_head, | |||
tag_exists_meta, tag_exists_body) | |||
-- Forged OE/MSO boundary | |||
reconf['SUSPICIOUS_BOUNDARY'] = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(00EBFFA4|0102FFA4|32C6FFA4|3302FFA4)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX' | |||
-- Forged OE/MSO boundary | |||
reconf['SUSPICIOUS_BOUNDARY2'] = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(01C6527E)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX' | |||
-- Forged OE/MSO boundary | |||
reconf['SUSPICIOUS_BOUNDARY3'] = 'Content-Type=/^\\s*multipart.+boundary="-----000-00\\d\\d-01C[\\dA-F]{5}-[\\dA-F]{8}"[\\r\\n]*$/siX' | |||
-- Forged OE/MSO boundary | |||
local suspicious_boundary_01C4 = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_01C4[\\dA-F]{4}\\.[A-Z\\d]{8}"[\\r\\n]*$/siX' | |||
local suspicious_boundary_01C4_date = 'Date=/^\\s*\\w\\w\\w,\\s+\\d+\\s+\\w\\w\\w 20(0[56789]|1\\d)/' | |||
reconf['SUSPICIOUS_BOUNDARY4'] = string.format('(%s) & (%s)', suspicious_boundary_01C4, suspicious_boundary_01C4_date) | |||
-- Detect forged The Bat! headers | |||
-- The Bat! X-Mailer header | |||
local thebat_mua_any = 'X-Mailer=/^\\s*The Bat!/H' | |||
-- The Bat! common Message-ID template | |||
local thebat_msgid_common = 'Message-ID=/^<?\\d+\\.\\d+\\@\\S+>?$/mH' | |||
-- Correct The Bat! Message-ID template | |||
local thebat_msgid = 'Message-ID=/^<?\\d+\\.(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)([0-5]\\d)\\@\\S+>?/mH' | |||
-- Summary rule for forged The Bat! Message-ID header | |||
reconf['FORGED_MUA_THEBAT_MSGID'] = string.format('(%s) & !(%s) & (%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid) | |||
-- Summary rule for forged The Bat! Message-ID header with unknown template | |||
reconf['FORGED_MUA_THEBAT_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid) | |||
-- Detect forged KMail headers | |||
-- KMail User-Agent header | |||
local kmail_mua = 'User-Agent=/^\\s*KMail\\/1\\.\\d+\\.\\d+/H' | |||
-- KMail common Message-ID template | |||
local kmail_msgid_common = 'Message-Id=/^<?\\s*\\d+\\.\\d+\\.\\S+\\@\\S+>?$/mH' | |||
function kmail_msgid (task) | |||
local regexp_text = '<(\\S+)>\\|(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)\\.\\d+\\.\\1$' | |||
local re = rspamd_regexp.create_cached(regexp_text) | |||
local header_msgid = task:get_header('Message-Id') | |||
if header_msgid then | |||
local header_from = task:get_header('From') | |||
if header_from and re:match(header_from.."|"..header_msgid) then return true end | |||
end | |||
return false | |||
end | |||
-- Summary rule for forged KMail Message-ID header | |||
reconf['FORGED_MUA_KMAIL_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', kmail_mua, kmail_msgid_common, 'kmail_msgid', unusable_msgid) | |||
-- Summary rule for forged KMail Message-ID header with unknown template | |||
reconf['FORGED_MUA_KMAIL_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s)', kmail_mua, kmail_msgid_common, unusable_msgid) | |||
-- Detect forged Opera Mail headers | |||
-- Opera Mail User-Agent header | |||
local opera1x_mua = 'User-Agent=/^\\s*Opera Mail\\/1[01]\\.\\d+ /H' | |||
-- Opera Mail Message-ID template | |||
local opera1x_msgid = 'Message-ID=/^<?op\\.[a-z\\d]{14}\\@\\S+>?$/H' | |||
-- Suspicious Opera Mail User-Agent header | |||
local suspicious_opera10w_mua = 'User-Agent=/^\\s*Opera Mail\\/10\\.\\d+ \\(Windows\\)$/H' | |||
-- Suspicious Opera Mail Message-ID, apparently from KMail | |||
local suspicious_opera10w_msgid = 'Message-Id=/^<?2009\\d{8}\\.\\d+\\.\\S+\\@\\S+?>$/H' | |||
-- Summary rule for forged Opera Mail User-Agent header and Message-ID header from KMail | |||
reconf['SUSPICIOUS_OPERA_10W_MSGID'] = string.format('(%s) & (%s)', suspicious_opera10w_mua, suspicious_opera10w_msgid) | |||
-- Summary rule for forged Opera Mail Message-ID header | |||
reconf['FORGED_MUA_OPERA_MSGID'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', opera1x_mua, opera1x_msgid, reconf['SUSPICIOUS_OPERA_10W_MSGID'], unusable_msgid) | |||
-- Detect forged Mozilla Mail/Thunderbird/Seamonkey headers | |||
-- Mozilla based X-Mailer | |||
local user_agent_mozilla5 = 'User-Agent=/^\\s*Mozilla\\/5\\.0/H' | |||
local user_agent_thunderbird = 'User-Agent=/^\\s*(Thunderbird|Mozilla Thunderbird|Mozilla\\/.*Gecko\\/.*Thunderbird\\/)/H' | |||
local user_agent_seamonkey = 'User-Agent=/^\\s*Mozilla\\/5\\.0\\s.+\\sSeaMonkey\\/\\d+\\.\\d+/H' | |||
local user_agent_mozilla = string.format('(%s) & !(%s) & !(%s)', user_agent_mozilla5, user_agent_thunderbird, user_agent_seamonkey) | |||
-- Mozilla based common Message-ID template | |||
local mozilla_msgid_common = 'Message-ID=/^\\s*<[\\dA-F]{8}\\.\\d{1,7}\\@([^>\\.]+\\.)+[^>\\.]+>$/H' | |||
local mozilla_msgid = 'Message-ID=/^\\s*<(3[3-9A-F]|4[\\dA-F]|5[\\dA-F])[\\dA-F]{6}\\.(\\d0){1,4}\\d\\@([^>\\.]+\\.)+[^>\\.]+>$/H' | |||
-- Summary rule for forged Mozilla Mail Message-ID header | |||
reconf['FORGED_MUA_MOZILLA_MAIL_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid, unusable_msgid) | |||
reconf['FORGED_MUA_MOZILLA_MAIL_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid, unusable_msgid) | |||
-- Summary rule for forged Thunderbird Message-ID header | |||
reconf['FORGED_MUA_THUNDERBIRD_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common, mozilla_msgid, unusable_msgid) | |||
reconf['FORGED_MUA_THUNDERBIRD_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common, mozilla_msgid, unusable_msgid) | |||
-- Summary rule for forged Seamonkey Message-ID header | |||
reconf['FORGED_MUA_SEAMONKEY_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common, mozilla_msgid, unusable_msgid) | |||
reconf['FORGED_MUA_SEAMONKEY_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common, mozilla_msgid, unusable_msgid) | |||
-- Message id validity | |||
local sane_msgid = 'Message-Id=/^<?[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+>?\\s*$/H' | |||
local msgid_comment = 'Message-Id=/\\(.*\\)/H' | |||
reconf['INVALID_MSGID'] = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment) | |||
-- Only Content-Type header without other MIME headers | |||
local cd = 'header_exists(Content-Disposition)' | |||
local cte = 'header_exists(Content-Transfer-Encoding)' | |||
local ct = 'header_exists(Content-Type)' | |||
local mime_version = 'raw_header_exists(MIME-Version)' | |||
local ct_text_plain = 'content_type_is_type(text) & content_type_is_subtype(plain)' | |||
reconf['MIME_HEADER_CTYPE_ONLY'] = string.format('!(%s) & !(%s) & (%s) & !(%s) & !(%s)', cd, cte, ct, mime_version, ct_text_plain) | |||
-- Forged Exchange messages | |||
local msgid_dollars_ok = 'Message-Id=/[0-9a-f]{4,}\\$[0-9a-f]{4,}\\$[0-9a-f]{4,}\\@\\S+/H' | |||
local mimeole_ms = 'X-MimeOLE=/^Produced By Microsoft MimeOLE/H' | |||
local rcvd_with_exchange = 'Received=/with Microsoft Exchange Server/H' | |||
reconf['RATWARE_MS_HASH'] = string.format('(%s) & !(%s) & !(%s)', msgid_dollars_ok, mimeole_ms, rcvd_with_exchange) | |||
-- Reply-type in content-type | |||
reconf['STOX_REPLY_TYPE'] = 'Content-Type=/text\\/plain; .* reply-type=original/H' | |||
-- Fake Verizon headers | |||
local fhelo_verizon = 'X-Spam-Relays-Untrusted=/^[^\\]]+ helo=[^ ]+verizon\\.net /iH' | |||
local fhost_verizon = 'X-Spam-Relays-Untrusted=/^[^\\]]+ rdns=[^ ]+verizon\\.net /iH' | |||
reconf['FM_FAKE_HELO_VERIZON'] = string.format('(%s) & !(%s)', fhelo_verizon, fhost_verizon) | |||
-- Forged yahoo msgid | |||
local at_yahoo_msgid = 'Message-Id=/\\@yahoo\\.com\\b/iH' | |||
local at_yahoogroups_msgid = 'Message-Id=/\\@yahoogroups\\.com\\b/iH' | |||
local from_yahoo_com = 'From=/\\@yahoo\\.com\\b/iH' | |||
reconf['FORGED_MSGID_YAHOO'] = string.format('(%s) & !(%s)', at_yahoo_msgid, from_yahoo_com) | |||
local r_from_yahoo_groups = 'From=/rambler.ru\\@returns\\.groups\\.yahoo\\.com\\b/iH' | |||
local r_from_yahoo_groups_ro = 'From=/ro.ru\\@returns\\.groups\\.yahoo\\.com\\b/iH' | |||
-- Forged The Bat! MUA headers | |||
local thebat_mua_v1 = 'X-Mailer=/^The Bat! \\(v1\\./H' | |||
local ctype_has_boundary = 'Content-Type=/boundary/iH' | |||
local bat_boundary = 'Content-Type=/boundary=\\"?-{10}/H' | |||
local mailman_21 = 'X-Mailman-Version=/\\d/H' | |||
reconf['FORGED_MUA_THEBAT_BOUN'] = string.format('(%s) & (%s) & !(%s) & !(%s)', thebat_mua_v1, ctype_has_boundary, bat_boundary, mailman_21) | |||
-- Two received headers with ip addresses | |||
local double_ip_spam_1 = 'Received=/from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with/H' | |||
local double_ip_spam_2 = 'Received=/from\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\s+by\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3};/H' | |||
reconf['RCVD_DOUBLE_IP_SPAM'] = string.format('(%s) | (%s)', double_ip_spam_1, double_ip_spam_2) | |||
-- Quoted reply-to from yahoo (seems to be forged) | |||
local repto_quote = 'Reply-To=/\\".*\\"\\s*\\</H' | |||
local from_yahoo_com = 'From=/\\@yahoo\\.com\\b/iH' | |||
local at_yahoo_msgid = 'Message-Id=/\\@yahoo\\.com\\b/iH' | |||
reconf['REPTO_QUOTE_YAHOO'] = string.format('(%s) & ((%s) | (%s))', repto_quote, from_yahoo_com, at_yahoo_msgid) | |||
-- MUA definitions | |||
local xm_gnus = 'X-Mailer=/^Gnus v/H' | |||
local xm_msoe5 = 'X-Mailer=/^Microsoft Outlook Express 5/H' | |||
local xm_msoe6 = 'X-Mailer=/^Microsoft Outlook Express 6/H' | |||
local xm_mso12 = 'X-Mailer=/^Microsoft(?: Office Outlook 12\\.0| Outlook 14\\.0)/H' | |||
local xm_cgpmapi = 'X-Mailer=/^CommuniGate Pro MAPI Connector/H' | |||
local xm_moz4 = 'X-Mailer=/^Mozilla 4/H' | |||
local xm_skyri = 'X-Mailer=/^SKYRiXgreen/H' | |||
local xm_wwwmail = 'X-Mailer=/^WWW-Mail \\d/H' | |||
local ua_gnus = 'User-Agent=/^Gnus/H' | |||
local ua_knode = 'User-Agent=/^KNode/H' | |||
local ua_mutt = 'User-Agent=/^Mutt/H' | |||
local ua_pan = 'User-Agent=/^Pan/H' | |||
local ua_xnews = 'User-Agent=/^Xnews/H' | |||
local no_inr_yes_ref = string.format('(%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s)', xm_gnus, xm_msoe5, xm_msoe6, xm_moz4, xm_skyri, xm_wwwmail, ua_gnus, ua_knode, ua_mutt, ua_pan, ua_xnews) | |||
local subj_re = 'Subject=/^R[eE]:/H' | |||
local has_ref = 'header_exists(References)' | |||
local missing_ref = string.format('!(%s)', has_ref) | |||
-- Fake reply (has RE in subject, but has not References header) | |||
reconf['FAKE_REPLY_C'] = string.format('(%s) & (%s) & (%s) & !(%s)', subj_re, missing_ref, no_inr_yes_ref, xm_msoe6) | |||
-- Mime-OLE is needed but absent (e.g. fake Outlook or fake Ecxchange) | |||
local has_msmail_pri = 'header_exists(X-MSMail-Priority)' | |||
local has_mimeole = 'header_exists(X-MimeOLE)' | |||
local has_squirrelmail_in_mailer = 'X-Mailer=/SquirrelMail\\b/H' | |||
local has_ips_php_in_mailer = 'X-Mailer=/^IPS PHP Mailer/' | |||
reconf['MISSING_MIMEOLE'] = string.format('(%s) & !(%s) & !(%s) & !(%s) & !(%s) & !(%s)', has_msmail_pri, has_mimeole, has_squirrelmail_in_mailer, xm_mso12, xm_cgpmapi, has_ips_php_in_mailer) | |||
-- Header delimiters | |||
local yandex_from = 'From=/\\@(yandex\\.ru|yandex\\.net|ya\\.ru)/iX' | |||
local yandex_x_envelope_from = 'X-Envelope-From=/\\@(yandex\\.ru|yandex\\.net|ya\\.ru)/iX' | |||
local yandex_return_path = 'Return-Path=/\\@(yandex\\.ru|yandex\\.net|ya\\.ru)/iX' | |||
local yandex_received = 'Received=/^\\s*from \\S+\\.(yandex\\.ru|yandex\\.net)/mH' | |||
local yandex = string.format('(%s) & ((%s) | (%s) | (%s))', yandex_received, yandex_from, yandex_x_envelope_from, yandex_return_path) | |||
-- Tabs as delimiters between header names and header values | |||
function check_header_delimiter_tab(task, header_name) | |||
for _,rh in ipairs(task:get_header_full(header_name)) do | |||
if rh['tab_separated'] then return true end | |||
end | |||
return false | |||
end | |||
reconf['HEADER_FROM_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(From)', yandex) | |||
reconf['HEADER_TO_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(To)', yandex) | |||
reconf['HEADER_CC_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(Cc)', yandex) | |||
reconf['HEADER_REPLYTO_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(Reply-To)', yandex) | |||
reconf['HEADER_DATE_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(Date)', yandex) | |||
-- Empty delimiters between header names and header values | |||
function check_header_delimiter_empty(task, header_name) | |||
for _,rh in ipairs(task:get_header_full(header_name)) do | |||
if rh['empty_separator'] then return true end | |||
end | |||
return false | |||
end | |||
reconf['HEADER_FROM_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(From)') | |||
reconf['HEADER_TO_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(To)') | |||
reconf['HEADER_CC_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(Cc)') | |||
reconf['HEADER_REPLYTO_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(Reply-To)') | |||
reconf['HEADER_DATE_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(Date)') | |||
-- Definitions of received headers regexp | |||
reconf['RCVD_ILLEGAL_CHARS'] = 'Received=/[\\x80-\\xff]/X' | |||
local MAIL_RU_Return_Path = 'Return-path=/^\\s*<.+\\@mail\\.ru>$/iX' | |||
local MAIL_RU_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@mail\\.ru>$/iX' | |||
local MAIL_RU_From = 'From=/\\@mail\\.ru>?$/iX' | |||
local MAIL_RU_Received = 'Received=/from mail\\.ru \\(/mH' | |||
reconf['FAKE_RECEIVED_mail_ru'] = string.format('(%s) & !(((%s) | (%s)) & (%s))', MAIL_RU_Received, MAIL_RU_Return_Path, MAIL_RU_X_Envelope_From, MAIL_RU_From) | |||
local GMAIL_COM_Return_Path = 'Return-path=/^\\s*<.+\\@gmail\\.com>$/iX' | |||
local GMAIL_COM_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@gmail\\.com>$/iX' | |||
local GMAIL_COM_From = 'From=/\\@gmail\\.com>?$/iX' | |||
local UKR_NET_Return_Path = 'Return-path=/^\\s*<.+\\@ukr\\.net>$/iX' | |||
local UKR_NET_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@ukr\\.net>$/iX' | |||
local UKR_NET_From = 'From=/\\@ukr\\.net>?$/iX' | |||
local RECEIVED_smtp_yandex_ru_1 = 'Received=/from \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] \\((port=\\d+ )?helo=smtp\\.yandex\\.ru\\)/iX' | |||
local RECEIVED_smtp_yandex_ru_2 = 'Received=/from \\[UNAVAILABLE\\] \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]:\\d+ helo=smtp\\.yandex\\.ru\\)/iX' | |||
local RECEIVED_smtp_yandex_ru_3 = 'Received=/from \\S+ \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]:\\d+ helo=smtp\\.yandex\\.ru\\)/iX' | |||
local RECEIVED_smtp_yandex_ru_4 = 'Received=/from \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] \\(account \\S+ HELO smtp\\.yandex\\.ru\\)/iX' | |||
local RECEIVED_smtp_yandex_ru_5 = 'Received=/from smtp\\.yandex\\.ru \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]\\)/iX' | |||
local RECEIVED_smtp_yandex_ru_6 = 'Received=/from smtp\\.yandex\\.ru \\(\\S+ \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]\\)/iX' | |||
local RECEIVED_smtp_yandex_ru_7 = 'Received=/from \\S+ \\(HELO smtp\\.yandex\\.ru\\) \\(\\S+\\@\\d+\\.\\d+\\.\\d+\\.\\d+\\)/iX' | |||
local RECEIVED_smtp_yandex_ru_8 = 'Received=/from \\S+ \\(HELO smtp\\.yandex\\.ru\\) \\(\\d+\\.\\d+\\.\\d+\\.\\d+\\)/iX' | |||
local RECEIVED_smtp_yandex_ru_9 = 'Received=/from \\S+ \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] helo=smtp\\.yandex\\.ru\\)/iX' | |||
reconf['FAKE_RECEIVED_smtp_yandex_ru'] = string.format('(((%s) & ((%s) | (%s))) | ((%s) & ((%s) | (%s))) | ((%s) & ((%s) | (%s)))) & (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s)', MAIL_RU_From, MAIL_RU_Return_Path, MAIL_RU_X_Envelope_From, GMAIL_COM_From, GMAIL_COM_Return_Path, GMAIL_COM_X_Envelope_From, UKR_NET_From, UKR_NET_Return_Path, UKR_NET_X_Envelope_From, RECEIVED_smtp_yandex_ru_1, RECEIVED_smtp_yandex_ru_2, RECEIVED_smtp_yandex_ru_3, RECEIVED_smtp_yandex_ru_4, RECEIVED_smtp_yandex_ru_5, RECEIVED_smtp_yandex_ru_6, RECEIVED_smtp_yandex_ru_7, RECEIVED_smtp_yandex_ru_8, RECEIVED_smtp_yandex_ru_9) | |||
reconf['FORGED_GENERIC_RECEIVED'] = 'Received=/^\\s*(.+\\n)*from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by (([\\w\\d-]+\\.)+[a-zA-Z]{2,6}|\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}); \\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0/X' | |||
reconf['FORGED_GENERIC_RECEIVED2'] = 'Received=/^\\s*(.+\\n)*from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by ([\\w\\d-]+\\.)+[a-z]{2,6} id [\\w\\d]{12}; \\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0/X' | |||
reconf['FORGED_GENERIC_RECEIVED3'] = 'Received=/^\\s*(.+\\n)*by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with SMTP id [a-zA-Z]{14}\\.\\d{13};[\\r\\n\\s]*\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0 \\(GMT\\)/X' | |||
reconf['FORGED_GENERIC_RECEIVED4'] = 'Received=/^\\s*(.+\\n)*from localhost by \\S+;\\s+\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0[\\s\\r\\n]*$/X' | |||
rspamd_config.FORGED_GENERIC_RECEIVED5 = function (task) | |||
local regexp_text = '^\\s*from \\[(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})\\].*\\n(.+\\n)*\\s*from \\1 by \\S+;\\s+\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0$' | |||
local re = rspamd_regexp.create_cached(regexp_text, 'i') | |||
local headers_recv = task:get_header_full('Received') | |||
if headers_recv then | |||
for _,header_r in ipairs(headers_recv) do | |||
if re:match(header_r['value']) then | |||
return true | |||
end | |||
end | |||
end | |||
return false | |||
end | |||
reconf['INVALID_POSTFIX_RECEIVED'] = 'Received=/ \\(Postfix\\) with ESMTP id [A-Z\\d]+([\\s\\r\\n]+for <\\S+?>)?;[\\s\\r\\n]*[A-Z][a-z]{2}, \\d{1,2} [A-Z][a-z]{2} \\d\\d\\d\\d \\d\\d:\\d\\d:\\d\\d [\\+\\-]\\d\\d\\d\\d$/X' | |||
rspamd_config.INVALID_EXIM_RECEIVED = function (task) | |||
local checked = 0 | |||
local headers_to = task:get_header_full('To') | |||
if headers_to then | |||
local headers_recv = task:get_header_full('Received') | |||
local regexp_text = '^[^\\n]*?<?\\S+?\\@(\\S+)>?\\|.*from \\d+\\.\\d+\\.\\d+\\.\\d+ \\(HELO \\S+\\)[\\s\\r\\n]*by \\1 with esmtp \\(\\S*?[\\?\\@\\(\\)\\s\\.\\+\\*\'\'\\/\\\\,]\\S*\\)[\\s\\r\\n]+id \\S*?[\\)\\(<>\\/\\\\,\\-:=]' | |||
local re = rspamd_regexp.create_cached(regexp_text, 's') | |||
if headers_recv then | |||
for _,header_to in ipairs(headers_to) do | |||
for _,header_r in ipairs(headers_recv) do | |||
if re:match(header_to['value'].."|"..header_r['value']) then | |||
return true | |||
end | |||
end | |||
checked = checked + 1 | |||
if checked > 5 then | |||
-- Stop on 5 rcpt | |||
return false | |||
end | |||
end | |||
end | |||
end | |||
return false | |||
end | |||
rspamd_config.INVALID_EXIM_RECEIVED2 = function (task) | |||
local checked = 0 | |||
local headers_to = task:get_header_full('To') | |||
if headers_to then | |||
local headers_recv = task:get_header_full('Received') | |||
local regexp_text = '^[^\\n]*?<?\\S+?\\@(\\S+)>?\\|.*from \\d+\\.\\d+\\.\\d+\\.\\d+ \\(HELO \\S+\\)[\\s\\r\\n]*by \\1 with esmtp \\([A-Z]{9,12} [A-Z]{5,6}\\)[\\s\\r\\n]+id [a-zA-Z\\d]{6}-[a-zA-Z\\d]{6}-[a-zA-Z\\d]{2}[\\s\\r\\n]+' | |||
local re = rspamd_regexp.create_cached(regexp_text, 's') | |||
if headers_recv then | |||
for _,header_to in ipairs(headers_to) do | |||
for _,header_r in ipairs(headers_recv) do | |||
if re:match(header_to['value'].."|"..header_r['value']) then | |||
return true | |||
end | |||
end | |||
checked = checked + 1 | |||
if checked > 5 then | |||
-- Stop on 5 rcpt | |||
return false | |||
end | |||
end | |||
end | |||
end | |||
return false | |||
end |
@@ -0,0 +1,33 @@ | |||
-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license: | |||
-- | |||
-- Licensed to the Apache Software Foundation (ASF) under one or more | |||
-- contributor license agreements. See the NOTICE file distributed with | |||
-- this work for additional information regarding copyright ownership. | |||
-- The ASF licenses this file to you under the Apache License, Version 2.0 | |||
-- (the "License"); you may not use this file except in compliance with | |||
-- the License. You may obtain a copy of the License at: | |||
-- | |||
-- http://www.apache.org/licenses/LICENSE-2.0 | |||
-- | |||
-- Unless required by applicable law or agreed to in writing, software | |||
-- distributed under the License is distributed on an "AS IS" BASIS, | |||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
-- See the License for the specific language governing permissions and | |||
-- limitations under the License. | |||
-- | |||
-- Rules that are specific for lotto spam messages | |||
local reconf = config['regexp'] | |||
local r_lotto_from = 'From=/(?:lottery|News center|congratulation to you|NED INFO|BRITISH NATIONAL HEADQUATERS|MICROSOFT ON LINE SUPPORT TEAM|prize|online notification)/iH' | |||
local r_lotto_subject = 'Subject=/(?:\\xA3\\d|pounds?|FINAL NOTIFICATION|FOR YOUR ATTENTION|File in Your Claims?|ATTN|prize|Claims requirement|amount|confirm|your e-mail address won|congratulations)/iH' | |||
local r_lotto_body = '/(?:won|winning|\\xA3\\d|pounds?|GBP|LOTTERY|awards|prize)/isrP' | |||
local kam_lotto1 = '/(e-?mail address (have emerged a winner|has won|attached to (ticket|reference)|was one of the ten winners)|random selection in our computerized email selection system)/isrP' | |||
local kam_lotto2 = '/((ticket|serial|lucky) number|secret pin ?code|batch number|reference number|promotion date)/isrP' | |||
local kam_lotto3 = '/(won|claim|cash prize|pounds? sterling)/isrP' | |||
local kam_lotto4 = '/(claims (officer|agent)|lottery coordinator|fiduciary (officer|agent)|fiduaciary claims)/isrP' | |||
local kam_lotto5 = '/(freelotto group|Royal Heritage Lottery|UK National (Online)? Lottery|U\\.?K\\.? Grand Promotions|Lottery Department UK|Euromillion Loteria|Luckyday International Lottery|International Lottery)/isrP' | |||
local kam_lotto6 = '/(Dear Lucky Winner|Winning Notification|Attention:Winner|Dear Winner)/isrP' | |||
local kam_lotto7 = 'Subject=/(Your Lucky Day|(Attention:|ONLINE) WINNER)/iH' | |||
reconf['R_LOTTO'] = string.format('((%s) | (%s) | (%s)) & (((%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s)) >= 3)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], r_lotto_from, r_lotto_subject, r_lotto_body, kam_lotto1, kam_lotto2, kam_lotto3, kam_lotto4, kam_lotto5, kam_lotto6) | |||
@@ -0,0 +1,144 @@ | |||
--[[ | |||
Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru> | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are met: | |||
1. Redistributions of source code must retain the above copyright notice, this | |||
list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright notice, | |||
this list of conditions and the following disclaimer in the documentation | |||
and/or other materials provided with the distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
]]-- | |||
-- Detect language of message and selects appropriate statfiles for it | |||
-- Common labels for specific statfiles | |||
local many_recipients_label = 'many recipients' | |||
local undisclosed_recipients_label = 'undisclosed recipients' | |||
local list_label = 'maillist' | |||
local long_subject_label = 'long subject' | |||
-- Get specific statfiles set based on message rules | |||
local function get_specific_statfiles(classifier, task) | |||
if not table.foreach then | |||
table.foreach = function(t, f) | |||
for k, v in pairs(t) do f(k, v) end | |||
end | |||
end | |||
local spec_st = {} | |||
-- More 5 recipients | |||
local st_many = classifier:get_statfile_by_label(many_recipients_label) | |||
if st_many then | |||
rcpt = task:get_recipients(2) | |||
if rcpt and table.maxn(rcpt) > 5 then | |||
print(table.maxn(rcpt)) | |||
table.foreach(st_many, function(i,v) table.insert(spec_st,v) end) | |||
end | |||
end | |||
-- Undisclosed | |||
local st_undisc = classifier:get_statfile_by_label(undisclosed_recipients_label) | |||
if st_undisc then | |||
rcpt = task:get_recipients(2) | |||
if rcpt and table.maxn(rcpt) == 0 then | |||
table.foreach(st_undisc, function(i,v) table.insert(spec_st,v) end) | |||
end | |||
end | |||
-- Maillist | |||
local st_maillist = classifier:get_statfile_by_label(list_label) | |||
if st_maillist then | |||
local unsub_header = task:get_header_raw('List-Unsubscribe') | |||
if unsub_header then | |||
table.foreach(st_maillist, function(i,v) table.insert(spec_st,v) end) | |||
end | |||
end | |||
-- Long subject | |||
local st_longsubj = classifier:get_statfile_by_label(long_subject_label) | |||
if st_longsubj then | |||
local subj = task:get_header_raw('Subject') | |||
if subj and string.len(subj) > 150 then | |||
table.foreach(st_longsubj, function(i,v) table.insert(spec_st,v) end) | |||
end | |||
end | |||
if table.maxn(spec_st) > 1 then | |||
return spec_st | |||
else | |||
return nil | |||
end | |||
end | |||
classifiers['bayes'] = function(classifier, task, is_learn, is_spam) | |||
-- Subfunction for detection of message's language | |||
local detect_language = function(task) | |||
local parts = task:get_text_parts() | |||
for _,p in ipairs(parts) do | |||
local l = p:get_language() | |||
if l then | |||
return l | |||
end | |||
end | |||
return nil | |||
end | |||
-- Main procedure | |||
local selected = {} | |||
local spec_st = get_specific_statfiles(classifier, task) | |||
if spec_st then | |||
if is_learn then | |||
return spec_st | |||
else | |||
-- Merge tables | |||
table.foreach(spec_st, function(i,v) table.insert(selected,v) end) | |||
end | |||
end | |||
-- Detect statfile by language | |||
language = detect_language(task) | |||
if language then | |||
-- Find statfiles with specified language | |||
for _,st in ipairs(classifier:get_statfiles()) do | |||
-- Skip labeled statfiles | |||
if not st:get_label() then | |||
local st_l = st:get_param('language') | |||
if st_l and st_l == language then | |||
-- Insert statfile with specified language | |||
table.insert(selected, st) | |||
end | |||
end | |||
end | |||
if table.maxn(selected) > 1 then | |||
return selected | |||
end | |||
end | |||
-- Language not detected or specific language statfiles have not been found | |||
for _,st in ipairs(classifier:get_statfiles()) do | |||
-- Skip labeled statfiles | |||
if not st:get_label() then | |||
local st_l = st:get_param('language') | |||
-- Insert only statfiles without language | |||
if not st_l then | |||
table.insert(selected, st) | |||
end | |||
end | |||
end | |||
if table.maxn(selected) > 1 then | |||
return selected | |||
end | |||
return nil | |||
end | |||
@@ -0,0 +1,120 @@ | |||
--[[ | |||
Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru> | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are met: | |||
1. Redistributions of source code must retain the above copyright notice, this | |||
list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright notice, | |||
this list of conditions and the following disclaimer in the documentation | |||
and/or other materials provided with the distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
]]-- | |||
-- This is main lua config file for rspamd | |||
config['regexp'] = {} | |||
dofile('regexp/headers.lua') | |||
dofile('regexp/lotto.lua') | |||
dofile('regexp/fraud.lua') | |||
dofile('regexp/drugs.lua') | |||
dofile('html.lua') | |||
local reconf = config['regexp'] | |||
local util = require "rspamd_util" | |||
-- Uncategorized rules | |||
-- Local rules | |||
local r_bgcolor = '/BGCOLOR=/iP' | |||
local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP' | |||
reconf['R_WHITE_ON_WHITE'] = string.format('(!(%s) & (%s))', r_bgcolor, r_font_color) | |||
reconf['R_FLASH_REDIR_IMGSHACK'] = '/^(?:http:\\/\\/)?img\\d{1,5}\\.imageshack\\.us\\/\\S+\\.swf/U' | |||
-- Different text parts | |||
rspamd_config.R_PARTS_DIFFER = function(task) | |||
local distance = task:get_mempool():get_variable('parts_distance', 'int') | |||
if distance then | |||
local nd = tonumber(distance) | |||
if nd < 50 then | |||
local score = 1 - util.tanh(nd / 100.0) | |||
task:insert_result('R_PARTS_DIFFER', score, tostring(nd) .. '%') | |||
end | |||
end | |||
return false | |||
end | |||
-- Date issues | |||
rspamd_config.MISSING_DATE = function(task) | |||
if rspamd_config:get_api_version() >= 5 then | |||
if not task:get_header_raw('Date') then | |||
return true | |||
end | |||
end | |||
return false | |||
end | |||
rspamd_config.DATE_IN_FUTURE = function(task) | |||
if rspamd_config:get_api_version() >= 5 then | |||
local dm = task:get_date{format = 'message'} | |||
local dt = task:get_date{format = 'connect'} | |||
-- An 2 hour | |||
if dm > 0 and dm - dt > 7200 then | |||
return true | |||
end | |||
end | |||
return false | |||
end | |||
rspamd_config.DATE_IN_PAST = function(task) | |||
if rspamd_config:get_api_version() >= 5 then | |||
local dm = task:get_date{format = 'message', gmt = true} | |||
local dt = task:get_date{format = 'connect', gmt = true} | |||
-- A day | |||
if dm > 0 and dt - dm > 86400 then | |||
return true | |||
end | |||
end | |||
return false | |||
end | |||
local function file_exists(filename) | |||
local file = io.open(filename) | |||
if file then | |||
io.close(file) | |||
return true | |||
else | |||
return false | |||
end | |||
end | |||
if file_exists('hfilter.lua') then | |||
dofile('hfilter.lua') | |||
end | |||
if file_exists('rspamd.local.lua') then | |||
dofile('rspamd.local.lua') | |||
end | |||
if file_exists('rspamd.classifiers.lua') then | |||
dofile('rspamd.classifiers.lua') | |||
end |