Преглед на файлове

Move lua rules to rules dir.

tags/1.0.0
Vsevolod Stakhov преди 8 години
родител
ревизия
4135a3b422
променени са 8 файла, в които са добавени 1487 реда и са изтрити 0 реда
  1. 466
    0
      rules/hfilter.lua
  2. 76
    0
      rules/html.lua
  3. 83
    0
      rules/regexp/drugs.lua
  4. 74
    0
      rules/regexp/fraud.lua
  5. 491
    0
      rules/regexp/headers.lua
  6. 33
    0
      rules/regexp/lotto.lua
  7. 144
    0
      rules/rspamd.classifiers.lua
  8. 120
    0
      rules/rspamd.lua

+ 466
- 0
rules/hfilter.lua Целия файл

@@ -0,0 +1,466 @@
--[[
Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
Copyright (c) 2013-2015, Alexey Savelyev <info@homeweb.ru>
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
]]--


-- Weight for checks_hellohost and checks_hello: 5 - very hard, 4 - hard, 3 - meduim, 2 - low, 1 - very low.
-- From HFILTER_HELO_* and HFILTER_HOSTNAME_* symbols the maximum weight is selected in case of their actuating.


--local dumper = require 'pl.pretty'.dump
local rspamd_regexp = require "rspamd_regexp"

local checks_hellohost = {
['[.-]gprs[.-]'] = 5, ['gprs[.-][0-9]'] = 5, ['[0-9][.-]?gprs'] = 5,
['[.-]cdma[.-]'] = 5, ['cdma[.-][0-9]'] = 5, ['[0-9][.-]?cdma'] = 5,
['[.-]homeuser[.-]'] = 5, ['homeuser[.-][0-9]'] = 5, ['[0-9][.-]?homeuser'] = 5,
['[.-]dhcp[.-]'] = 5, ['dhcp[.-][0-9]'] = 5, ['[0-9][.-]?dhcp'] = 5,
['[.-]catv[.-]'] = 5, ['catv[.-][0-9]'] = 5, ['[0-9][.-]?catv'] = 5,
['[.-]wifi[.-]'] = 5, ['wifi[.-][0-9]'] = 5, ['[0-9][.-]?wifi'] = 5,
['[.-]dial-?up[.-]'] = 5, ['dial-?up[.-][0-9]'] = 5, ['[0-9][.-]?dial-?up'] = 5,
['[.-]dynamic[.-]'] = 5, ['dynamic[.-][0-9]'] = 5, ['[0-9][.-]?dynamic'] = 5,
['[.-]dyn[.-]'] = 5, ['dyn[.-][0-9]'] = 5, ['[0-9][.-]?dyn'] = 5,
['[.-]clients?[.-]'] = 5, ['clients?[.-][0-9]'] = 5, ['[0-9][.-]?clients?'] = 5,
['[.-]dynip[.-]'] = 5, ['dynip[.-][0-9]'] = 5, ['[0-9][.-]?dynip'] = 5,
['[.-]broadband[.-]'] = 5, ['broadband[.-][0-9]'] = 5, ['[0-9][.-]?broadband'] = 5,
['[.-]broad[.-]'] = 5, ['broad[.-][0-9]'] = 5, ['[0-9][.-]?broad'] = 5,
['[.-]bredband[.-]'] = 5, ['bredband[.-][0-9]'] = 5, ['[0-9][.-]?bredband'] = 5,
['[.-]nat[.-]'] = 5, ['nat[.-][0-9]'] = 5, ['[0-9][.-]?nat'] = 5,
['[.-]pptp[.-]'] = 5, ['pptp[.-][0-9]'] = 5, ['[0-9][.-]?pptp'] = 5,
['[.-]pppoe[.-]'] = 5, ['pppoe[.-][0-9]'] = 5, ['[0-9][.-]?pppoe'] = 5,
['[.-]ppp[.-]'] = 5, ['ppp[.-][0-9]'] = 5, ['[0-9][.-]?ppp'] = 5,
['[.-]modem[.-]'] = 5, ['modem[.-][0-9]'] = 5, ['[0-9][.-]?modem'] = 5,
['[.-]cablemodem[.-]'] = 5, ['cablemodem[.-][0-9]'] = 5, ['[0-9][.-]?cablemodem'] = 5,
['[.-]comcast[.-]'] = 5, ['comcast[.-][0-9]'] = 5, ['[0-9][.-]?comcast'] = 5,
['[.-][a|x]?dsl-dynamic[.-]'] = 5, ['[a|x]?dsl-dynamic[.-]?[0-9]'] = 5, ['[0-9][.-]?[a|x]?dsl-dynamic'] = 5,
['[.-][a|x]?dsl[.-]'] = 4, ['[a|x]?dsl[.-]?[0-9]'] = 4, ['[0-9][.-]?[a|x]?dsl'] = 4,
['[.-][a|x]?dsl-line[.-]'] = 4, ['[a|x]?dsl-line[.-]?[0-9]'] = 4, ['[0-9][.-]?[a|x]?dsl-line'] = 4,
['[.-]in-?addr[.-]'] = 4, ['in-?addr[.-][0-9]'] = 4, ['[0-9][.-]?in-?addr'] = 4,
['[.-]pool[.-]'] = 4, ['pool[.-][0-9]'] = 4, ['[0-9][.-]?pool'] = 4,
['[.-]fibertel[.-]'] = 4, ['fibertel[.-][0-9]'] = 4, ['[0-9][.-]?fibertel'] = 4,
['[.-]fbx[.-]'] = 4, ['fbx[.-][0-9]'] = 4, ['[0-9][.-]?fbx'] = 4,
['[.-]unused-addr[.-]'] = 3, ['unused-addr[.-][0-9]'] = 3, ['[0-9][.-]?unused-addr'] = 3,
['[.-]cable[.-]'] = 3, ['cable[.-][0-9]'] = 3, ['[0-9][.-]?cable'] = 3,
['[.-]kabel[.-]'] = 3, ['kabel[.-][0-9]'] = 3, ['[0-9][.-]?kabel'] = 3,
['[.-]host[.-]'] = 2, ['host[.-][0-9]'] = 2, ['[0-9][.-]?host'] = 2,
['[.-]customers?[.-]'] = 1, ['customers?[.-][0-9]'] = 1, ['[0-9][.-]?customers?'] = 1,
['[.-]user[.-]'] = 1, ['user[.-][0-9]'] = 1, ['[0-9][.-]?user'] = 1,
['[.-]peer[.-]'] = 1, ['peer[.-][0-9]'] = 1, ['[0-9][.-]?peer'] = 1
}

local checks_hello = {
['^[^\\.]+$'] = 5, -- for helo=COMPUTER, ANNA, etc... Without dot in helo
['localhost$'] = 5,
['^(dsl)?(device|speedtouch)\\.lan$'] = 5,
['\\.(lan|local|home|localdomain|intra|in-addr.arpa|priv|online|user|veloxzon)$'] = 5
}

local checks_hello_badip = {
['^0\\.'] = 5, ['^::1$'] = 5, --loopback ipv4, ipv6
['^127\\.'] = 5, ['^10\\.'] = 5, ['^192\\.168\\.'] = 5, --local ipv4
['^172\\.1[6-9]\\.'] = 5, ['^172\\.2[0-9]\\.'] = 5, ['^172\\.3[01]\\.'] = 5, --local ipv4
['^169\\.254\\.'] = 5, --chanel ipv4
['^192\\.0\\.0\\.'] = 5, --IETF Protocol
['^192\\.88\\.99\\.'] = 5, --RFC3068
['^100.6[4-9]\\.'] = 5, ['^100.[7-9]\\d\\.'] = 5, ['^100.1[01]\\d\\.'] = 5, ['^100.12[0-7]\\d\\.'] = 5, --RFC6598
['^\\d\\.\\d\\.\\d\\.255$'] = 5, --multicast ipv4
['^192\\.0\\.2\\.'] = 5, ['^198\\.51\\.100\\.'] = 5, ['^203\\.0\\.113\\.'] = 5, --sample
['^fe[89ab][0-9a-f]::'] = 5, ['^fe[cdf][0-9a-f]:'] = 5, --local ipv6 (fe80:: - febf::, fec0:: - feff::)
['^2001:db8::'] = 5, --reserved RFC 3849 for ipv6
['^fc00::'] = 5, ['^ffxx::'] = 5 --unicast, multicast ipv6
}

local checks_hello_bareip = {
'^\\d+[x.-]\\d+[x.-]\\d+[x.-]\\d+$', --bareip ipv4,
'^[0-9a-f]+:' --bareip ipv6
}

local config = {
['helo_enabled'] = false,
['hostname_enabled'] = false,
['from_enabled'] = false,
['rcpt_enabled'] = false,
['mid_enabled'] = false,
['url_enabled'] = false
}

local function check_regexp(str, regexp_text)
local re = rspamd_regexp.create_cached(regexp_text, 'i')
if re:match(str) then return true end
return false
end

local function split(str, delim, maxNb)
-- Eliminate bad cases...
if string.find(str, delim) == nil then
return { str }
end
if maxNb == nil or maxNb < 1 then
maxNb = 0 -- No limit
end
local result = {}
local pat = "(.-)" .. delim .. "()"
local nb = 0
local lastPos
for part, pos in string.gmatch(str, pat) do
nb = nb + 1
result[nb] = part
lastPos = pos
if nb == maxNb then break end
end
-- Handle the last field
if nb ~= maxNb then
result[nb + 1] = string.sub(str, lastPos)
end
return result
end

local function check_fqdn(domain)
if check_regexp(domain, '(?=^.{4,253}$)(^((?!-)[a-zA-Z0-9-]{1,63}(?<!-)\\.)+[a-zA-Z0-9-]{2,63}\\.?$)') then
return true
end
return false
end

-- host: host for check
-- symbol_suffix: suffix for symbol
-- eq_ip: ip for comparing or empty string
-- eq_host: host for comparing or empty string
local function check_host(task, host, symbol_suffix, eq_ip, eq_host)

local function check_host_cb_mx_a(resolver, to_resolve, results, err)
task:inc_dns_req()
if not results then
task:insert_result('HFILTER_' .. symbol_suffix .. '_NORESOLVE_MX', 1.0)
end
end
local function check_host_cb_mx(resolver, to_resolve, results, err)
task:inc_dns_req()
if not results then
task:insert_result('HFILTER_' .. symbol_suffix .. '_NORES_A_OR_MX', 1.0)
else
for _,mx in pairs(results) do
if mx['name'] then
task:get_resolver():resolve_a(task:get_session(), task:get_mempool(), mx['name'], check_host_cb_mx_a)
end
end
end
end
local function check_host_cb_a(resolver, to_resolve, results, err)
task:inc_dns_req()

if not results then
task:get_resolver():resolve_mx(task:get_session(), task:get_mempool(), host, check_host_cb_mx)
elseif eq_ip ~= '' then
for _,result in pairs(results) do
if result:to_string() == eq_ip then
return true
end
end
task:insert_result('HFILTER_' .. symbol_suffix .. '_IP_A', 1.0)
end
end

if host then
host = string.lower(host)
else
return false
end
if eq_host then
eq_host = string.lower(eq_host)
else
eq_host = ''
end

if check_fqdn(host) then
if eq_host == '' or eq_host ~= 'unknown' or eq_host ~= host then
task:get_resolver():resolve_a(task:get_session(), task:get_mempool(), host, check_host_cb_a)
end
else
task:insert_result('HFILTER_' .. symbol_suffix .. '_NOT_FQDN', 1.0)
end

return true
end

--
local function hfilter(task)
-- Links checks
if config['url_enabled'] then
local parts = task:get_text_parts()
if parts then
--One text part--
local total_parts_len = 0
local text_parts_count = 0
local selected_text_part = nil
for _,p in ipairs(parts) do
total_parts_len = total_parts_len + p:get_length()

if not p:is_html() then
text_parts_count = text_parts_count + 1
selected_text_part = p
end
end
if total_parts_len > 0 then
local urls = task:get_urls()
if urls then
local total_url_len = 0
for _,url in ipairs(urls) do
total_url_len = total_url_len + url:get_length()
end
if total_url_len > 0 then
if total_url_len + 7 > total_parts_len then
task:insert_result('HFILTER_URL_ONLY', 1.00)
elseif text_parts_count == 1 and selected_text_part and selected_text_part:get_length() < 1024 then
if selected_text_part:get_lines_count() < 2 then
task:insert_result('HFILTER_URL_ONELINE', 1.00)
end
end
end
end
end
end
end
--No more checks for auth user
if task:get_user() ~= nil then
return false
end
--local message = task:get_message()
local ip = false
local rip = task:get_from_ip()
if rip and rip:is_valid() then
ip = rip:to_string()
end
-- Check's HELO
local weight_helo = 0
if config['helo_enabled'] then
local helo = task:get_helo()
if helo then
helo = string.gsub(helo, '[%[%]]', '')
-- Regexp check HELO (checks_hello_badip)
local find_badip = false
for regexp,weight in pairs(checks_hello_badip) do
if check_regexp(helo, regexp) then
task:insert_result('HFILTER_HELO_BADIP', 1.0)
find_badip = true
break
end
end
-- Regexp check HELO (checks_hello_bareip)
local find_bareip = false
if not find_badip then
for _,regexp in pairs(checks_hello_bareip) do
if check_regexp(helo, regexp) then
task:insert_result('HFILTER_HELO_BAREIP', 1.0)
find_bareip = true
break
end
end
end
if not find_badip and not find_bareip then
-- Regexp check HELO (checks_hello)
for regexp,weight in pairs(checks_hello) do
if check_regexp(helo, regexp) then
weight_helo = weight
break
end
end
-- Regexp check HELO (checks_hellohost)
for regexp,weight in pairs(checks_hellohost) do
if check_regexp(helo, regexp) then
if weight > weight_helo then
weight_helo = weight
end
break
end
end
--FQDN check HELO
if ip and helo and weight_helo == 0 then
check_host(task, helo, 'HELO', ip, hostname)
end
end
else
task:insert_result('HFILTER_HELO_UNKNOWN', 1.0)
end
end
-- Check's HOSTNAME
local weight_hostname = 0
if config['hostname_enabled'] then
local hostname = task:get_hostname()
if hostname then
-- Check regexp HOSTNAME
if hostname == 'unknown' then
task:insert_result('HFILTER_HOSTNAME_UNKNOWN', 1.00)
else
for regexp,weight in pairs(checks_hellohost) do
if check_regexp(hostname, regexp) then
weight_hostname = weight
break
end
end
end
else
task:insert_result('HFILTER_HOSTNAME_UNKNOWN', 1.00)
end
end
--Insert weight's for HELO or HOSTNAME
if weight_helo > 0 and weight_helo >= weight_hostname then
task:insert_result('HFILTER_HELO_' .. weight_helo, 1.0)
elseif weight_hostname > 0 and weight_hostname > weight_helo then
task:insert_result('HFILTER_HOSTNAME_' .. weight_hostname, 1.0)
end
-- MAILFROM checks --
local frombounce = false
if config['from_enabled'] then
local from = task:get_from(1)
if from then
--FROM host check
for _,fr in ipairs(from) do
local fr_split = split(fr['addr'], '@', 0)
if table.maxn(fr_split) == 2 then
check_host(task, fr_split[2], 'FROMHOST', '', '')
if fr_split[1] == 'postmaster' then
frombounce = true
end
end
end
else
task:insert_result('HFILTER_FROM_BOUNCE', 1.00)
frombounce = true
end
end
-- Recipients checks --
if config['rcpt_enabled'] then
local rcpt = task:get_recipients()
if rcpt then
local count_rcpt = table.maxn(rcpt)
if frombounce then
if count_rcpt > 1 then
task:insert_result('HFILTER_RCPT_BOUNCEMOREONE', 1.00)
end
end
end
end

--Message ID host check
if config['mid_enabled'] then
local message_id = task:get_message_id()
if message_id then
local mid_split = split(message_id, '@', 0)
if table.maxn(mid_split) == 2 and not string.find(mid_split[2], 'local') then
check_host(task, mid_split[2], 'MID', '', '')
end
end
end
return false
end

local symbols_enabled = {}

local symbols_helo = {
"HFILTER_HELO_BAREIP",
"HFILTER_HELO_BADIP",
"HFILTER_HELO_UNKNOWN",
"HFILTER_HELO_1",
"HFILTER_HELO_2",
"HFILTER_HELO_3",
"HFILTER_HELO_4",
"HFILTER_HELO_5",
"HFILTER_HELO_NORESOLVE_MX",
"HFILTER_HELO_NORES_A_OR_MX",
"HFILTER_HELO_IP_A",
"HFILTER_HELO_NOT_FQDN"
}
local symbols_hostname = {
"HFILTER_HOSTNAME_1",
"HFILTER_HOSTNAME_2",
"HFILTER_HOSTNAME_3",
"HFILTER_HOSTNAME_4",
"HFILTER_HOSTNAME_5",
"HFILTER_HOSTNAME_UNKNOWN"
}
local symbols_rcpt = {
"HFILTER_RCPT_BOUNCEMOREONE"
}
local symbols_mid = {
"HFILTER_MID_NORESOLVE_MX",
"HFILTER_MID_NORES_A_OR_MX",
"HFILTER_MID_NOT_FQDN"
}
local symbols_url = {
"HFILTER_URL_ONLY",
"HFILTER_URL_ONELINE"
}
local symbols_from = {
"HFILTER_FROMHOST_NORESOLVE_MX",
"HFILTER_FROMHOST_NORES_A_OR_MX",
"HFILTER_FROMHOST_NOT_FQDN",
"HFILTER_FROM_BOUNCE"
}

local opts = rspamd_config:get_all_opt('hfilter')
if opts then
for k,v in pairs(opts) do
config[k] = v
end
end

local function append_t(t, a)
for _,v in ipairs(a) do table.insert(t, v) end
end
if config['helo_enabled'] then
append_t(symbols_enabled, symbols_helo)
end
if config['hostname_enabled'] then
append_t(symbols_enabled, symbols_hostname)
end
if config['from_enabled'] then
append_t(symbols_enabled, symbols_from)
end
if config['rcpt_enabled'] then
append_t(symbols_enabled, symbols_rcpt)
end
if config['mid_enabled'] then
append_t(symbols_enabled, symbols_mid)
end
if config['url_enabled'] then
append_t(symbols_enabled, symbols_url)
end

--dumper(symbols_enabled)
if table.maxn(symbols_enabled) > 0 then
rspamd_config:register_symbols(hfilter, 1.0, "HFILTER", symbols_enabled);
end

+ 76
- 0
rules/html.lua Целия файл

@@ -0,0 +1,76 @@
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to you under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at:
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.

local reconf = config['regexp']
local rspamd_regexp = require "rspamd_regexp"
local rspamd_logger = require "rspamd_logger"

-- Messages that have only HTML part
reconf['MIME_HTML_ONLY'] = 'has_only_html_part()'

local function check_html_image(task, min, max)
local tp = task:get_text_parts()
for _,p in ipairs(tp) do
if p:is_html() then
local hc = p:get_html()
local len = p:get_raw_length()
if len >= min and len < max then
local images = hc:get_images()
if images then
for _,i in ipairs(images) do
if i['embedded'] then
return true
end
end
end
end
end
end
end

rspamd_config.HTML_SHORT_LINK_IMG_1 = function(task)
return check_html_image(task, 0, 1024)
end
rspamd_config.HTML_SHORT_LINK_IMG_2 = function(task)
return check_html_image(task, 1024, 1536)
end
rspamd_config.HTML_SHORT_LINK_IMG_3 = function(task)
return check_html_image(task, 1536, 2048)
end
rspamd_config.R_EMPTY_IMAGE = function(task)
local tp = task:get_text_parts() -- get text parts in a message
for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
if p:is_html() then -- if the current part is html part
local hc = p:get_html() -- we get HTML context
local len = p:get_length() -- and part's length
if len < 50 then -- if we have a part that has less than 50 bytes of text
local images = hc:get_images() -- then we check for HTML images
if images then -- if there are images
for _,i in ipairs(images) do -- then iterate over images in the part
if i['height'] + i['width'] >= 400 then -- if we have a large image
return true -- add symbol
end
end
end
end
end
end
end

+ 83
- 0
rules/regexp/drugs.lua Целия файл

@@ -0,0 +1,83 @@
-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license:
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to you under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at:
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
-- Drugs spam (viagra, pills etc)
-- XXX: remove this legacy to statfile


local reconf = config['regexp']

local drugs_diet1 = '/(?:\\b|\\s)[_\\W]{0,3}p[_\\W]{0,3}h[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}n[_\\W]{0,3}t[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}r[_\\W]{0,3}m[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}n[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}(?:\\b|\\s)/irP'
local drugs_diet2 = '/(?:\\b|\\s)_{0,3}[i1!|l\\xEC-\\xEF][_\\W]?o[_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?n_{0,3}\\b/irP'
local drugs_diet3 = '/\\bbontril\\b/irP'
local drugs_diet4 = '/\\bphendimetrazine\\b/irP'
local drugs_diet5 = '/\\bdiethylpropion\\b/irP'
local drugs_diet6 = '/(?:\\b|\\s)[_\\W]{0,3}M[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}r[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}d[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP'
local drugs_diet7 = '/\\b_{0,3}t[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?u[_\\W]?a[_\\W]?t[_\\W]?[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP'
local drugs_diet8 = '/\\b_{0,3}d[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?d[_\\W]?r[_\\W][e3\\xE8-\\xEB[_\\W]?xx?_{0,3}\\b/irP'
local drugs_diet9 = '/\\b_{0,3}a[_\\W]?d[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?p[_\\W]?[e3\\xE8-\\xEB][_\\W]?x_{0,3}\\b/irP'
local drugs_diet10 = '/\\b_{0,3}x?x[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?c[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l_{0,3}\\b/irP'
reconf['DRUGS_DIET'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_diet1, drugs_diet2, drugs_diet3, drugs_diet4, drugs_diet5, drugs_diet6, drugs_diet7, drugs_diet8, drugs_diet9, drugs_diet10)
local drugs_erectile1 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[ij1!|l\\xEC\\xED\\xEE\\xEF][_\\W]{0,3}[a40\\xE0-\\xE6@][_\\W]{0,3}[xyz]?[gj][_\\W]{0,3}r[_\\W]{0,3}[a40\\xE0-\\xE6@][_\\W]{0,3}x?[_\\W]{0,3}(?:\\b|\\s)/irP'
local drugs_erectile2 = '/\\bV(?:agira|igara|iaggra|iaegra)\\b/irP'
local drugs_erectile3 = '/(?:\\A|[\\s\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\x7f])[_\\W]{0,3}C[_\\W]{0,3}[ij1!|l\\xEC\\xED\\xEE\\xEF][_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}l?[l!|1][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}s[_\\W]{0,3}(?:\\b|\\s)/irP'
local drugs_erectile4 = '/\\bC(?:alis|ilias|ilais)\\b/irP'
local drugs_erectile5 = '/\\b_{0,3}s[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l[_\\W]?d[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l c[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?t[_\\W]?[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP'
local drugs_erectile6 = '/\\b_{0,3}L[_\\W]?[e3\\xE8-\\xEB][_\\W]?(?:\\\\\\/|V)[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?(?:\\b|\\s)/irP'
local drugs_erectile8 = '/\\b_{0,3}T[_\\W]?[a4\\xE0-\\xE6@][_\\W]?d[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l[_\\W]?[a4\\xE0-\\xE6@][_\\W]?f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l_{0,3}\\b/irP'
local drugs_erectile10 = '/\\b_{0,3}V[_\\W]?(?:i|\\&iuml\\;)[_\\W]?(?:a|\\&agrave|\\&aring)\\;?[_\\W]?g[_\\W]?r[_\\W]?(?:a|\\&agrave|\\&aring)\\b/irP'
local drugs_erectile11 = '/(?:\\b|\\s)_{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}p[_\\W]{0,3}c[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}[l!|1][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}s_{0,3}\\b/irP'
reconf['DRUGS_ERECTILE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_erectile1, drugs_erectile2, drugs_erectile3, drugs_erectile4, drugs_erectile5, drugs_erectile6, drugs_erectile8, drugs_erectile10, drugs_erectile11)
local drugs_anxiety1 = '/(?:\\b|\\s)[_\\W]{0,3}x?x[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}n[_\\W]{0,3}[ea4\\xE1\\xE2\\xE3@][_\\W]{0,3}xx?_{0,3}\\b/irP'
local drugs_anxiety2 = '/\\bAlprazolam\\b/irP'
local drugs_anxiety3 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}[l|][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[u\\xB5\\xF9-\\xFC][_\\W]{0,3}m\\b/irP'
local drugs_anxiety4 = '/\\b_{0,3}D[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?[ea3\\xE9\\xEA\\xEB][_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m_{0,3}\\b/irP'
local drugs_anxiety5 = '/(?:\\b|\\s)[a4\\xE0-\\xE6@][_\\W]?t[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?v[_\\W]?[a4\\xE0-\\xE6@][_\\W]?n_{0,3}\\b/irP'
local drugs_anxiety6 = '/\\b_{0,3}l[_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?[e3\\xE8-\\xEB][_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m_{0,3}\\b/irP'
local drugs_anxiety7 = '/\\b_{0,3}c[_\\W]?l[_\\W]?[o0\\xF2-\\xF6][_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?e[_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m\\b/irP'
local drugs_anxiety8 = '/\\bklonopin\\b/irP'
local drugs_anxiety9 = '/\\brivotril\\b/irP'
reconf['DRUGS_ANXIETY'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_anxiety1, drugs_anxiety2, drugs_anxiety3, drugs_anxiety4, drugs_anxiety5, drugs_anxiety6, drugs_anxiety7, drugs_anxiety8, drugs_anxiety9)
reconf['DRUGS_ANXIETY_EREC'] = string.format('(%s) & (%s)', reconf['DRUGS_ERECTILE'], reconf['DRUGS_ANXIETY'])
local drugs_pain1 = '/\\b_{0,3}h[_\\W]?y[_\\W]?d[_\\W]?r[_\\W]?[o0\\xF2-\\xF6][_\\W]?c[_\\W]?[o0\\xF2-\\xF6][_\\W]?d[_\\W]?[o0\\xF2-\\xF6][_\\W]?n[_\\W]?e_{0,3}\\b/irP'
local drugs_pain2 = '/\\b_{0,3}c[o0\\xF2-\\xF6]deine_{0,3}\\b/irP'
local drugs_pain3 = '/(?:\\b|\\s)[_\\W]{0,3}[u\\xB5\\xF9-\\xFC][_\\W]{0,3}l[_\\W]{0,3}t[_\\W]{0,3}r[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}m_{0,3}\\b/irP'
local drugs_pain4 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}c[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}d[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}ns?[_\\W]{0,3}(?:\\b|\\s)/irP'
local drugs_pain5 = '/\\b_{0,3}t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m[_\\W]?[a4\\xE0-\\xE6@][_\\W]?d[_\\W]?[o0\\xF2-\\xF6][_\\W]?[l!|1]_{0,3}\\b/irP'
local drugs_pain6 = '/\\b_{0,3}u[_\\W]?l[_\\W]?t[_\\W]?r[_\\W]?a[_\\W]?c[_\\W]?e[_\\W]?t_{0,3}\\b/irP'
local drugs_pain7 = '/\\b_{0,3}f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?c[_\\W]?[e3\\xE8-\\xEB][_\\W]?[t7]_{0,3}\\b/irP'
local drugs_pain8 = '/\\b_{0,3}c[_\\W]?[e3\\xE8-\\xEB][_\\W]?l[_\\W]?[e3\\xE8-\\xEB][_\\W]?b[_\\W]?r[_\\W]?[e3\\xE8-\\xEB][_\\W]?x_{0,3}\\b/irP'
local drugs_pain9 = '/(?:\\b|\\s)_{0,3}[i1!|l\\xEC-\\xEF]m[i1!|l\\xEC-\\xEF]tr[e3\\xE8-\\xEB]x_{0,3}\\b/irP'
local drugs_pain10 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}x[_\\W]{0,3}xx?_{0,3}\\b/irP'
local drugs_pain11 = '/\\bzebutal\\b/irP'
local drugs_pain12 = '/\\besgic plus\\b/irP'
local drugs_pain13 = '/\\bD[_\\W]?[a4\\xE0-\\xE6@][_\\W]?r[_\\W]?v[_\\W]?[o0\\xF2-\\xF6][_\\W]?n\\b/irP'
local drugs_pain14 = '/N[o0\\xF2-\\xF6]rc[o0\\xF2-\\xF6]/irP'
local drugs_pain = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) || (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_pain1, drugs_pain2, drugs_pain3, drugs_pain4, drugs_pain5, drugs_pain6, drugs_pain7, drugs_pain8, drugs_pain9, drugs_pain10, drugs_pain11, drugs_pain12, drugs_pain13, drugs_pain14)
local drugs_sleep1 = '/(?:\\b|\\s)[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}m[_\\W]{0,3}b[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}n[_\\W]{0,3}(?:\\b|\\s)/irP'
local drugs_sleep2 = '/(?:\\b|\\s)[_\\W]{0,3}S[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}n[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}t[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP'
local drugs_sleep3 = '/\\b_{0,3}R[_\\W]?[e3\\xE8-\\xEB][_\\W]?s[_\\W]?t[_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?i[_\\W]?l_{0,3}\\b/irP'
local drugs_sleep4 = '/\\b_{0,3}H[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l[_\\W]?c[_\\W]?i[_\\W]?[o0\\xF2-\\xF6][_\\W]?n_{0,3}\\b/irP'
local drugs_sleep = string.format('(%s) | (%s) | (%s) | (%s)', drugs_sleep1, drugs_sleep2, drugs_sleep3, drugs_sleep4)
local drugs_muscle1 = '/(?:\\b|\\s)[_\\W]{0,3}s[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}m[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP'
local drugs_muscle2 = '/\\b_{0,3}cycl[o0\\xF2-\\xF6]b[e3\\xE8-\\xEB]nz[a4\\xE0-\\xE6@]pr[i1!|l\\xEC-\\xEF]n[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP'
local drugs_muscle3 = '/\\b_{0,3}f[_\\W]?l[_\\W]?[e3\\xE8-\\xEB][_\\W]?x[_\\W]?[e3\\xE8-\\xEB][_\\W]?r[_\\W]?[i1!|l\\xEC-\\xEF]_{0,3}[_\\W]?l_{0,3}\\b/irP'
local drugs_muscle4 = '/\\b_{0,3}z[_\\W]?a[_\\W]?n[_\\W]?a[_\\W]?f[_\\W]?l[_\\W]?e[_\\W]?x_{0,3}\\b/irP'
local drugs_muscle5 = '/\\bskelaxin\\b/irP'
reconf['DRUGS_MUSCLE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_muscle1, drugs_muscle2, drugs_muscle3, drugs_muscle4, drugs_muscle5)
reconf['DRUGS_MANYKINDS'] = string.format('((%s) | (%s) | (%s)) & ((%s) + (%s) + (%s) + (%s) + (%s) + (%s) >= 3)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], reconf['DRUGS_ERECTILE'], reconf['DRUGS_DIET'], drugs_pain, drugs_sleep, reconf['DRUGS_MUSCLE'], reconf['DRUGS_ANXIETY'])


+ 74
- 0
rules/regexp/fraud.lua Целия файл

@@ -0,0 +1,74 @@
-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license:
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to you under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at:
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
-- Fraud messages (Nigeria spam, viagra, etc)
local reconf = config['regexp']

local fraud_dbi = '/(?:\\bdollars?\\b|\\busd(?:ollars)?(?:[0-9]|\\b)|\\bus\\$|\\$[0-9,.]{6,}|\\$[0-9].{0,8}[mb]illion|\\$[0-9.,]{2,10} ?m|\\beuros?\\b|u[.]?s[.]? [0-9.]+ m)/irP'
local fraud_kjv = '/(?:claim|concerning) (?:the|this) money/irP'
local fraud_irj = '/(?:finance|holding|securit(?:ies|y)) (?:company|firm|storage house)/irP'
local fraud_neb = '/(?:government|bank) of nigeria/irP'
local fraud_xjr = '/(?:who was a|as a|an? honest|you being a|to any) foreigner/irP'
local fraud_dpr = '/\\b(?:(?:respond|reply) (?:urgently|immediately)|(?:urgent|immediate|earliest) (?:reply|response))\\b/irP'
local fraud_pts = '/\\b(?:ass?ass?inat(?:ed|ion)|murder(?:e?d)?|kill(?:ed|ing)\\b[^.]{0,99}\\b(?:war veterans|rebels?))\\b/irP'
local fraud_bep = '/\\b(?:bank of nigeria|central bank of|trust bank|apex bank|amalgamated bank)\\b/irP'
local fraud_tdp = '/\\b(?:business partner(?:s|ship)?|silent partner(?:s|ship)?)\\b/irP'
local fraud_gan = '/\\b(?:charles taylor|serena|abacha|gu[eйи]i|sese[- ]?seko|kabila)\\b/irP'
local fraud_irt = '/\\b(?:compliments? of the|dear friend|dear sir|yours faithfully|season\'?s greetings)\\b/irP'
local fraud_aon = '/\\b(?:confidential|private|alternate|alternative) (?:(?:e-? *)?mail)\\b/irP'
local fraud_wny = '/\\b(?:disburse?(?:ment)?|incurr?(?:ed)?|remunerr?at(?:ed?|ion)|remm?itt?(?:ed|ance|ing)?)\\b/irP'
local fraud_ipk = '/\\b(?:in|to|visit) your country\\b/irP'
local fraud_qxx = '/\\b(?:my name is|i am) (?:mrs?|engr|barrister|dr|prince(?:ss)?)[. ]/irP'
local fraud_iou = '/\\b(?:no risks?|risk-? *free|free of risks?|100% safe)\\b/irP'
local fraud_ezy = '/\\b(?:of|the) late president\\b/irP'
local fraud_mly = '/\\b(?:reply|respond)\\b[^.]{0,50}\\b(?:to|through)\\b[^.]{0,50}\\@\\b/irP'
local fraud_zfj = '/\\b(?:wife|son|brother|daughter) of the late\\b/irP'
local fraud_kdt = '/\\bU\\.?S\\.?(?:D\\.?)?\\s*(?:\\$\\s*)?(?:\\d+,\\d+,\\d+|\\d+\\.\\d+\\.\\d+|\\d+(?:\\.\\d+)?\\s*milli?on)/irP'
local fraud_ulk = '/\\baffidavits?\\b/irP'
local fraud_bgp = '/\\battached to ticket number\\b/irP'
local fraud_fbi = '/\\bdisburs/irP'
local fraud_jbu = '/\\bforeign account\\b/irP'
local fraud_yww = '/\\bfurnish you with\\b/irP'
local fraud_jyg = '/\\bgive\\s+you .{0,15}(?:fund|money|total|sum|contact|percent)\\b/irP'
local fraud_xvw = '/\\bhonest cooperation\\b/irP'
local fraud_uuy = '/\\blegitimate business(?:es)?\\b/irP'
local fraud_snt = '/\\blocate(?: .{1,20})? extended relative/irP'
local fraud_ltx = '/\\bmilli?on (?:.{1,25} thousand\\s*)?(?:(?:united states|u\\.?s\\.?) dollars|(?i:U\\.?S\\.?D?))\\b/irP'
local fraud_jnb = '/\\boperat(?:e|ing)\\b[^.]{0,99}\\b(?:for(?:ei|ie)gn|off-? ?shore|over-? ?seas?) (?:bank )?accounts?\\b/irP'
local fraud_qfy = '/\\bover-? *(?:invoiced?|cost(?:s|ing)?)\\b/irP'
local fraud_wdr = '/\\bprivate lawyer\\b/irP'
local fraud_wfc = '/\\bsecur(?:e|ing) (?:the )?(?:funds?|monies)\\b/irP'
local fraud_aum = '/\\bthe desk of\\b/irP'
local fraud_mcq = '/\\btransaction\\b.{1,30}\\b(?:magnitude|diplomatic|strict|absolute|secret|confiden(?:tial|ce)|guarantee)/irP'
local fraud_etx = '/\\byour\\b[^.]{0,99}\\b(?:contact (?:details|information)|private (?:e?[- ]?mail|telephone|tel|phone|fax))\\b/irP'
local fraud_pvn = '/as the beneficiary/irP'
local fraud_fvu = '/award notification/irP'
local fraud_ckf = '/computer ballot system/irP'
local fraud_fcw = '/fiduciary agent/irP'
local fraud_mqo = '/foreign (?:business partner|customer)/irP'
local fraud_tcc = '/foreign (?:offshore )?(?:bank|account)/irP'
local fraud_gbw = '/god gives .{1,10}second chance/irP'
local fraud_nrg = '/i am contacting you/irP'
local fraud_rlx = '/lott(?:o|ery) (?:co,?ordinator|international)/irP'
local fraud_axf = '/magnanimity/irP'
local fraud_thj = '/modalit(?:y|ies)/irP'
local fraud_yqv = '/nigerian? (?:national|government)/irP'
local fraud_yja = '/over-invoice/irP'
local fraud_ypo = '/the total sum/irP'
local fraud_uoq = '/vital documents/irP'
reconf['ADVANCE_FEE_2'] = string.format('((%s) | (%s) | (%s)) & ((%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) >= 2)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], fraud_kjv, fraud_irj, fraud_neb, fraud_xjr, fraud_ezy, fraud_zfj, fraud_kdt, fraud_bgp, fraud_fbi, fraud_jbu, fraud_jyg, fraud_xvw, fraud_snt, fraud_ltx, fraud_mcq, fraud_pvn, fraud_fvu, fraud_ckf, fraud_fcw, fraud_mqo, fraud_tcc, fraud_gbw, fraud_nrg, fraud_rlx, fraud_axf, fraud_thj, fraud_yqv, fraud_yja, fraud_ypo, fraud_uoq, fraud_dbi, fraud_bep, fraud_dpr, fraud_qxx, fraud_qfy, fraud_pts, fraud_tdp, fraud_gan, fraud_ipk, fraud_aon, fraud_wny, fraud_aum, fraud_wfc, fraud_yww, fraud_ulk, fraud_iou, fraud_jnb, fraud_irt, fraud_etx, fraud_wdr, fraud_uuy, fraud_mly)
reconf['ADVANCE_FEE_3'] = string.format('((%s) | (%s) | (%s)) & ((%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) >= 3)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], fraud_kjv, fraud_irj, fraud_neb, fraud_xjr, fraud_ezy, fraud_zfj, fraud_kdt, fraud_bgp, fraud_fbi, fraud_jbu, fraud_jyg, fraud_xvw, fraud_snt, fraud_ltx, fraud_mcq, fraud_pvn, fraud_fvu, fraud_ckf, fraud_fcw, fraud_mqo, fraud_tcc, fraud_gbw, fraud_nrg, fraud_rlx, fraud_axf, fraud_thj, fraud_yqv, fraud_yja, fraud_ypo, fraud_uoq, fraud_dbi, fraud_bep, fraud_dpr, fraud_qxx, fraud_qfy, fraud_pts, fraud_tdp, fraud_gan, fraud_ipk, fraud_aon, fraud_wny, fraud_aum, fraud_wfc, fraud_yww, fraud_ulk, fraud_iou, fraud_jnb, fraud_irt, fraud_etx, fraud_wdr, fraud_uuy, fraud_mly)

+ 491
- 0
rules/regexp/headers.lua Целия файл

@@ -0,0 +1,491 @@
-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license:
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to you under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at:
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
-- Definitions of header regexps

local reconf = config['regexp']
local rspamd_regexp = require "rspamd_regexp"

-- Subject needs encoding
-- Define encodings types
local subject_encoded_b64 = 'Subject=/=\\?\\S+\\?B\\?/iX'
local subject_encoded_qp = 'Subject=/=\\?\\S+\\?Q\\?/iX'
-- Define whether subject must be encoded (contains non-7bit characters)
local subject_needs_mime = 'Subject=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X'
-- Final rule
reconf['SUBJECT_NEEDS_ENCODING'] = string.format('!(%s) & !(%s) & (%s)', subject_encoded_b64, subject_encoded_qp, subject_needs_mime)

-- Detects that there is no space in From header (e.g. Some Name<some@host>)
reconf['R_NO_SPACE_IN_FROM'] = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X'

-- Detects missing subject
local has_subject = 'header_exists(Subject)'
local empty_subject = 'Subject=/^$/'
-- Final rule
reconf['MISSING_SUBJECT'] = string.format('!(%s) | (%s)', has_subject, empty_subject)

-- Detects bad content-transfer-encoding for text parts
-- For text parts (text/plain and text/html mainly)
local r_ctype_text = 'content_type_is_type(text)'
-- Content transfer encoding is 7bit
local r_cte_7bit = 'compare_transfer_encoding(7bit)'
-- And body contains 8bit characters
local r_body_8bit = '/[^\\x01-\\x7f]/Pr'
reconf['R_BAD_CTE_7BIT'] = string.format('(%s) & (%s) & (%s)', r_ctype_text, r_cte_7bit, r_body_8bit)

-- Detects missing To header
reconf['MISSING_TO']= '!raw_header_exists(To)';

-- Detects undisclosed recipients
local undisc_rcpt = 'To=/^<?undisclosed[- ]recipient/Hi'
reconf['R_UNDISC_RCPT'] = string.format('(%s) | (%s)', reconf['MISSING_TO'], undisc_rcpt)

-- Detects missing Message-Id
local has_mid = 'header_exists(Message-Id)'
reconf['MISSING_MID'] = '!header_exists(Message-Id)';

-- Received seems to be fake
reconf['R_RCVD_SPAMBOTS'] = 'Received=/^from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by [-.\\w+]{5,255}; [SMTWF][a-z][a-z], [\\s\\d]?\\d [JFMAJSOND][a-z][a-z] \\d{4} \\d{2}:\\d{2}:\\d{2} [-+]\\d{4}$/mH'

-- To header seems to be autogenerated
reconf['R_TO_SEEMS_AUTO'] = 'To=/^\\"?(?<bt>[-.\\w]{1,64})\\"?\\s<\\k<bt>\\@/H'

-- Charset is missing in message
reconf['R_MISSING_CHARSET']= string.format('content_type_is_type(text) & !content_type_has_param(charset) & !%s', r_cte_7bit);

-- Subject seems to be spam
reconf['R_SAJDING'] = 'Subject=/\\bsajding(?:om|a)?\\b/iH'

-- Find forged Outlook MUA
-- Yahoo groups messages
local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H'
-- Outlook MUA
local outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H'
local any_outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H'
reconf['FORGED_OUTLOOK_HTML'] = string.format('!%s & %s & %s', yahoo_bulk, outlook_mua, 'has_only_html_part()')

-- Recipients seems to be likely with each other (only works when recipients count is more than 5 recipients)
reconf['SUSPICIOUS_RECIPS'] = 'compare_recipients_distance(0.65)'

-- Recipients list seems to be sorted
reconf['SORTED_RECIPS'] = 'is_recipients_sorted()'

-- Spam string at the end of message to make statistics faults
reconf['TRACKER_ID'] = '/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\\s*\\z/isPr'


-- From that contains encoded characters while base 64 is not needed as all symbols are 7bit
-- Regexp that checks that From header is encoded with base64 (search in raw headers)
local from_encoded_b64 = 'From=/\\=\\?\\S+\\?B\\?/iX'
-- From contains only 7bit characters (parsed headers are used)
local from_needs_mime = 'From=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr'
-- Final rule
reconf['FROM_EXCESS_BASE64'] = string.format('%s & !%s', from_encoded_b64, from_needs_mime)

-- From that contains encoded characters while quoted-printable is not needed as all symbols are 7bit
-- Regexp that checks that From header is encoded with quoted-printable (search in raw headers)
local from_encoded_qp = 'From=/\\=\\?\\S+\\?Q\\?/iX'
-- Final rule
reconf['FROM_EXCESS_QP'] = string.format('%s & !%s', from_encoded_qp, from_needs_mime)

-- To that contains encoded characters while base 64 is not needed as all symbols are 7bit
-- Regexp that checks that To header is encoded with base64 (search in raw headers)
local to_encoded_b64 = 'To=/\\=\\?\\S+\\?B\\?/iX'
-- To contains only 7bit characters (parsed headers are used)
local to_needs_mime = 'To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr'
-- Final rule
reconf['TO_EXCESS_BASE64'] = string.format('%s & !%s', to_encoded_b64, to_needs_mime)

-- To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit
-- Regexp that checks that To header is encoded with quoted-printable (search in raw headers)
local to_encoded_qp = 'To=/\\=\\?\\S+\\?Q\\?/iX'
-- Final rule
reconf['TO_EXCESS_QP'] = string.format('%s & !%s', to_encoded_qp, to_needs_mime)

-- Reply-To that contains encoded characters while base 64 is not needed as all symbols are 7bit
-- Regexp that checks that Reply-To header is encoded with base64 (search in raw headers)
local replyto_encoded_b64 = 'Reply-To=/\\=\\?\\S+\\?B\\?/iX'
-- Reply-To contains only 7bit characters (parsed headers are used)
local replyto_needs_mime = 'Reply-To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr'
-- Final rule
reconf['REPLYTO_EXCESS_BASE64'] = string.format('%s & !%s', replyto_encoded_b64, replyto_needs_mime)

-- Reply-To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit
-- Regexp that checks that Reply-To header is encoded with quoted-printable (search in raw headers)
local replyto_encoded_qp = 'Reply-To=/\\=\\?\\S+\\?Q\\?/iX'
-- Final rule
reconf['REPLYTO_EXCESS_QP'] = string.format('%s & !%s', replyto_encoded_qp, replyto_needs_mime)

-- Cc that contains encoded characters while base 64 is not needed as all symbols are 7bit
-- Regexp that checks that Cc header is encoded with base64 (search in raw headers)
local cc_encoded_b64 = 'Cc=/\\=\\?\\S+\\?B\\?/iX'
-- Co contains only 7bit characters (parsed headers are used)
local cc_needs_mime = 'Cc=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr'
-- Final rule
reconf['CC_EXCESS_BASE64'] = string.format('%s & !%s', cc_encoded_b64, cc_needs_mime)

-- Cc that contains encoded characters while quoted-printable is not needed as all symbols are 7bit
-- Regexp that checks that Cc header is encoded with quoted-printable (search in raw headers)
local cc_encoded_qp = 'Cc=/\\=\\?\\S+\\?Q\\?/iX'
-- Final rule
reconf['CC_EXCESS_QP'] = string.format('%s & !%s', cc_encoded_qp, cc_needs_mime)


-- Detect forged outlook headers
-- OE X-Mailer header
local oe_mua = 'X-Mailer=/\\bOutlook Express [456]\\./H'
-- OE Message ID format
local oe_msgid_1 = 'Message-Id=/^<?[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\\@hotmail\\.com>?$/mH'
local oe_msgid_2 = 'Message-Id=/^<?(?:[0-9a-f]{8}|[0-9a-f]{12})\\$[0-9a-f]{8}\\$[0-9a-f]{8}\\@\\S+>?$/H'
-- EZLM remail of message
local lyris_ezml_remailer = 'List-Unsubscribe=/<mailto:(?:leave-\\S+|\\S+-unsubscribe)\\@\\S+>$/H'
-- Header of wacky sendmail
local wacky_sendmail_version = 'Received=/\\/CWT\\/DCE\\)/H'
-- Iplanet received header
local iplanet_messaging_server = 'Received=/iPlanet Messaging Server/H'
-- Hotmail message id
local hotmail_baydav_msgid = 'Message-Id=/^<?BAY\\d+-DAV\\d+[A-Z0-9]{25}\\@phx\\.gbl?>$/H'
-- Sympatico message id
local sympatico_msgid = 'Message-Id=/^<?BAYC\\d+-PASMTP\\d+[A-Z0-9]{25}\\@CEZ\\.ICE>?$/H'
-- Mailman message id
local mailman_msgid = 'Message-ID=/^<mailman\\.\\d+\\.\\d+\\.\\d+\\..+\\@\\S+>$/H'
-- Message id seems to be forged
local unusable_msgid = string.format('(%s | %s | %s | %s | %s | %s)',
lyris_ezml_remailer, wacky_sendmail_version, iplanet_messaging_server, hotmail_baydav_msgid, sympatico_msgid, mailman_msgid)
-- Outlook express data seems to be forged
local forged_oe = string.format('(%s & !%s & !%s & !%s)', oe_mua, oe_msgid_1, oe_msgid_2, unusable_msgid)
-- Outlook specific headers
local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\\./H'
local outlook_dollars_other = 'Message-Id=/^<?\\!\\~\\!>?/H'
local vista_msgid = 'Message-Id=/^<?[A-F\\d]{32}\\@\\S+>?$/H'
local ims_msgid = 'Message-Id=/^<?[A-F\\d]{36,40}\\@\\S+>?$/H'
-- Forged outlook headers
local forged_outlook_dollars = string.format('(%s & !%s & !%s & !%s & !%s & !%s)',
outlook_dollars_mua, oe_msgid_2, outlook_dollars_other, vista_msgid, ims_msgid, unusable_msgid)
-- Outlook versions that should be excluded from summary rule
local fmo_excl_o3416 = 'X-Mailer=/^Microsoft Outlook, Build 10.0.3416$/H'
local fmo_excl_oe3790 = 'X-Mailer=/^Microsoft Outlook Express 6.00.3790.3959$/H'
-- Summary rule for forged outlook
reconf['FORGED_MUA_OUTLOOK'] = string.format('(%s | %s) & !%s & !%s & !%s',
forged_oe, forged_outlook_dollars, fmo_excl_o3416, fmo_excl_oe3790, vista_msgid)

-- HTML outlook signs
local mime_html = 'content_type_is_type(text) & content_type_is_subtype(/.?html/)'
local tag_exists_html = 'has_html_tag(html)'
local tag_exists_head = 'has_html_tag(head)'
local tag_exists_meta = 'has_html_tag(meta)'
local tag_exists_body = 'has_html_tag(body)'
reconf['FORGED_OUTLOOK_TAGS'] = string.format('!%s & %s & %s & !(%s & %s & %s & %s)',
yahoo_bulk, any_outlook_mua, mime_html, tag_exists_html, tag_exists_head,
tag_exists_meta, tag_exists_body)

-- Forged OE/MSO boundary
reconf['SUSPICIOUS_BOUNDARY'] = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(00EBFFA4|0102FFA4|32C6FFA4|3302FFA4)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX'
-- Forged OE/MSO boundary
reconf['SUSPICIOUS_BOUNDARY2'] = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(01C6527E)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX'
-- Forged OE/MSO boundary
reconf['SUSPICIOUS_BOUNDARY3'] = 'Content-Type=/^\\s*multipart.+boundary="-----000-00\\d\\d-01C[\\dA-F]{5}-[\\dA-F]{8}"[\\r\\n]*$/siX'
-- Forged OE/MSO boundary
local suspicious_boundary_01C4 = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_01C4[\\dA-F]{4}\\.[A-Z\\d]{8}"[\\r\\n]*$/siX'
local suspicious_boundary_01C4_date = 'Date=/^\\s*\\w\\w\\w,\\s+\\d+\\s+\\w\\w\\w 20(0[56789]|1\\d)/'
reconf['SUSPICIOUS_BOUNDARY4'] = string.format('(%s) & (%s)', suspicious_boundary_01C4, suspicious_boundary_01C4_date)

-- Detect forged The Bat! headers
-- The Bat! X-Mailer header
local thebat_mua_any = 'X-Mailer=/^\\s*The Bat!/H'
-- The Bat! common Message-ID template
local thebat_msgid_common = 'Message-ID=/^<?\\d+\\.\\d+\\@\\S+>?$/mH'
-- Correct The Bat! Message-ID template
local thebat_msgid = 'Message-ID=/^<?\\d+\\.(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)([0-5]\\d)\\@\\S+>?/mH'
-- Summary rule for forged The Bat! Message-ID header
reconf['FORGED_MUA_THEBAT_MSGID'] = string.format('(%s) & !(%s) & (%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid)
-- Summary rule for forged The Bat! Message-ID header with unknown template
reconf['FORGED_MUA_THEBAT_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid)


-- Detect forged KMail headers
-- KMail User-Agent header
local kmail_mua = 'User-Agent=/^\\s*KMail\\/1\\.\\d+\\.\\d+/H'
-- KMail common Message-ID template
local kmail_msgid_common = 'Message-Id=/^<?\\s*\\d+\\.\\d+\\.\\S+\\@\\S+>?$/mH'
function kmail_msgid (task)
local regexp_text = '<(\\S+)>\\|(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)\\.\\d+\\.\\1$'
local re = rspamd_regexp.create_cached(regexp_text)
local header_msgid = task:get_header('Message-Id')
if header_msgid then
local header_from = task:get_header('From')
if header_from and re:match(header_from.."|"..header_msgid) then return true end
end
return false
end
-- Summary rule for forged KMail Message-ID header
reconf['FORGED_MUA_KMAIL_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', kmail_mua, kmail_msgid_common, 'kmail_msgid', unusable_msgid)
-- Summary rule for forged KMail Message-ID header with unknown template
reconf['FORGED_MUA_KMAIL_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s)', kmail_mua, kmail_msgid_common, unusable_msgid)

-- Detect forged Opera Mail headers
-- Opera Mail User-Agent header
local opera1x_mua = 'User-Agent=/^\\s*Opera Mail\\/1[01]\\.\\d+ /H'
-- Opera Mail Message-ID template
local opera1x_msgid = 'Message-ID=/^<?op\\.[a-z\\d]{14}\\@\\S+>?$/H'
-- Suspicious Opera Mail User-Agent header
local suspicious_opera10w_mua = 'User-Agent=/^\\s*Opera Mail\\/10\\.\\d+ \\(Windows\\)$/H'
-- Suspicious Opera Mail Message-ID, apparently from KMail
local suspicious_opera10w_msgid = 'Message-Id=/^<?2009\\d{8}\\.\\d+\\.\\S+\\@\\S+?>$/H'
-- Summary rule for forged Opera Mail User-Agent header and Message-ID header from KMail
reconf['SUSPICIOUS_OPERA_10W_MSGID'] = string.format('(%s) & (%s)', suspicious_opera10w_mua, suspicious_opera10w_msgid)
-- Summary rule for forged Opera Mail Message-ID header
reconf['FORGED_MUA_OPERA_MSGID'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', opera1x_mua, opera1x_msgid, reconf['SUSPICIOUS_OPERA_10W_MSGID'], unusable_msgid)


-- Detect forged Mozilla Mail/Thunderbird/Seamonkey headers
-- Mozilla based X-Mailer
local user_agent_mozilla5 = 'User-Agent=/^\\s*Mozilla\\/5\\.0/H'
local user_agent_thunderbird = 'User-Agent=/^\\s*(Thunderbird|Mozilla Thunderbird|Mozilla\\/.*Gecko\\/.*Thunderbird\\/)/H'
local user_agent_seamonkey = 'User-Agent=/^\\s*Mozilla\\/5\\.0\\s.+\\sSeaMonkey\\/\\d+\\.\\d+/H'
local user_agent_mozilla = string.format('(%s) & !(%s) & !(%s)', user_agent_mozilla5, user_agent_thunderbird, user_agent_seamonkey)
-- Mozilla based common Message-ID template
local mozilla_msgid_common = 'Message-ID=/^\\s*<[\\dA-F]{8}\\.\\d{1,7}\\@([^>\\.]+\\.)+[^>\\.]+>$/H'
local mozilla_msgid = 'Message-ID=/^\\s*<(3[3-9A-F]|4[\\dA-F]|5[\\dA-F])[\\dA-F]{6}\\.(\\d0){1,4}\\d\\@([^>\\.]+\\.)+[^>\\.]+>$/H'
-- Summary rule for forged Mozilla Mail Message-ID header
reconf['FORGED_MUA_MOZILLA_MAIL_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid, unusable_msgid)
reconf['FORGED_MUA_MOZILLA_MAIL_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid, unusable_msgid)
-- Summary rule for forged Thunderbird Message-ID header
reconf['FORGED_MUA_THUNDERBIRD_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common, mozilla_msgid, unusable_msgid)
reconf['FORGED_MUA_THUNDERBIRD_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common, mozilla_msgid, unusable_msgid)
-- Summary rule for forged Seamonkey Message-ID header
reconf['FORGED_MUA_SEAMONKEY_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common, mozilla_msgid, unusable_msgid)
reconf['FORGED_MUA_SEAMONKEY_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common, mozilla_msgid, unusable_msgid)


-- Message id validity
local sane_msgid = 'Message-Id=/^<?[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+>?\\s*$/H'
local msgid_comment = 'Message-Id=/\\(.*\\)/H'
reconf['INVALID_MSGID'] = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment)


-- Only Content-Type header without other MIME headers
local cd = 'header_exists(Content-Disposition)'
local cte = 'header_exists(Content-Transfer-Encoding)'
local ct = 'header_exists(Content-Type)'
local mime_version = 'raw_header_exists(MIME-Version)'
local ct_text_plain = 'content_type_is_type(text) & content_type_is_subtype(plain)'
reconf['MIME_HEADER_CTYPE_ONLY'] = string.format('!(%s) & !(%s) & (%s) & !(%s) & !(%s)', cd, cte, ct, mime_version, ct_text_plain)


-- Forged Exchange messages
local msgid_dollars_ok = 'Message-Id=/[0-9a-f]{4,}\\$[0-9a-f]{4,}\\$[0-9a-f]{4,}\\@\\S+/H'
local mimeole_ms = 'X-MimeOLE=/^Produced By Microsoft MimeOLE/H'
local rcvd_with_exchange = 'Received=/with Microsoft Exchange Server/H'
reconf['RATWARE_MS_HASH'] = string.format('(%s) & !(%s) & !(%s)', msgid_dollars_ok, mimeole_ms, rcvd_with_exchange)

-- Reply-type in content-type
reconf['STOX_REPLY_TYPE'] = 'Content-Type=/text\\/plain; .* reply-type=original/H'

-- Fake Verizon headers
local fhelo_verizon = 'X-Spam-Relays-Untrusted=/^[^\\]]+ helo=[^ ]+verizon\\.net /iH'
local fhost_verizon = 'X-Spam-Relays-Untrusted=/^[^\\]]+ rdns=[^ ]+verizon\\.net /iH'
reconf['FM_FAKE_HELO_VERIZON'] = string.format('(%s) & !(%s)', fhelo_verizon, fhost_verizon)

-- Forged yahoo msgid
local at_yahoo_msgid = 'Message-Id=/\\@yahoo\\.com\\b/iH'
local at_yahoogroups_msgid = 'Message-Id=/\\@yahoogroups\\.com\\b/iH'
local from_yahoo_com = 'From=/\\@yahoo\\.com\\b/iH'
reconf['FORGED_MSGID_YAHOO'] = string.format('(%s) & !(%s)', at_yahoo_msgid, from_yahoo_com)
local r_from_yahoo_groups = 'From=/rambler.ru\\@returns\\.groups\\.yahoo\\.com\\b/iH'
local r_from_yahoo_groups_ro = 'From=/ro.ru\\@returns\\.groups\\.yahoo\\.com\\b/iH'

-- Forged The Bat! MUA headers
local thebat_mua_v1 = 'X-Mailer=/^The Bat! \\(v1\\./H'
local ctype_has_boundary = 'Content-Type=/boundary/iH'
local bat_boundary = 'Content-Type=/boundary=\\"?-{10}/H'
local mailman_21 = 'X-Mailman-Version=/\\d/H'
reconf['FORGED_MUA_THEBAT_BOUN'] = string.format('(%s) & (%s) & !(%s) & !(%s)', thebat_mua_v1, ctype_has_boundary, bat_boundary, mailman_21)

-- Two received headers with ip addresses
local double_ip_spam_1 = 'Received=/from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with/H'
local double_ip_spam_2 = 'Received=/from\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\s+by\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3};/H'
reconf['RCVD_DOUBLE_IP_SPAM'] = string.format('(%s) | (%s)', double_ip_spam_1, double_ip_spam_2)

-- Quoted reply-to from yahoo (seems to be forged)
local repto_quote = 'Reply-To=/\\".*\\"\\s*\\</H'
local from_yahoo_com = 'From=/\\@yahoo\\.com\\b/iH'
local at_yahoo_msgid = 'Message-Id=/\\@yahoo\\.com\\b/iH'
reconf['REPTO_QUOTE_YAHOO'] = string.format('(%s) & ((%s) | (%s))', repto_quote, from_yahoo_com, at_yahoo_msgid)

-- MUA definitions
local xm_gnus = 'X-Mailer=/^Gnus v/H'
local xm_msoe5 = 'X-Mailer=/^Microsoft Outlook Express 5/H'
local xm_msoe6 = 'X-Mailer=/^Microsoft Outlook Express 6/H'
local xm_mso12 = 'X-Mailer=/^Microsoft(?: Office Outlook 12\\.0| Outlook 14\\.0)/H'
local xm_cgpmapi = 'X-Mailer=/^CommuniGate Pro MAPI Connector/H'
local xm_moz4 = 'X-Mailer=/^Mozilla 4/H'
local xm_skyri = 'X-Mailer=/^SKYRiXgreen/H'
local xm_wwwmail = 'X-Mailer=/^WWW-Mail \\d/H'
local ua_gnus = 'User-Agent=/^Gnus/H'
local ua_knode = 'User-Agent=/^KNode/H'
local ua_mutt = 'User-Agent=/^Mutt/H'
local ua_pan = 'User-Agent=/^Pan/H'
local ua_xnews = 'User-Agent=/^Xnews/H'
local no_inr_yes_ref = string.format('(%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s)', xm_gnus, xm_msoe5, xm_msoe6, xm_moz4, xm_skyri, xm_wwwmail, ua_gnus, ua_knode, ua_mutt, ua_pan, ua_xnews)
local subj_re = 'Subject=/^R[eE]:/H'
local has_ref = 'header_exists(References)'
local missing_ref = string.format('!(%s)', has_ref)
-- Fake reply (has RE in subject, but has not References header)
reconf['FAKE_REPLY_C'] = string.format('(%s) & (%s) & (%s) & !(%s)', subj_re, missing_ref, no_inr_yes_ref, xm_msoe6)

-- Mime-OLE is needed but absent (e.g. fake Outlook or fake Ecxchange)
local has_msmail_pri = 'header_exists(X-MSMail-Priority)'
local has_mimeole = 'header_exists(X-MimeOLE)'
local has_squirrelmail_in_mailer = 'X-Mailer=/SquirrelMail\\b/H'
local has_ips_php_in_mailer = 'X-Mailer=/^IPS PHP Mailer/'
reconf['MISSING_MIMEOLE'] = string.format('(%s) & !(%s) & !(%s) & !(%s) & !(%s) & !(%s)', has_msmail_pri, has_mimeole, has_squirrelmail_in_mailer, xm_mso12, xm_cgpmapi, has_ips_php_in_mailer)

-- Header delimiters
local yandex_from = 'From=/\\@(yandex\\.ru|yandex\\.net|ya\\.ru)/iX'
local yandex_x_envelope_from = 'X-Envelope-From=/\\@(yandex\\.ru|yandex\\.net|ya\\.ru)/iX'
local yandex_return_path = 'Return-Path=/\\@(yandex\\.ru|yandex\\.net|ya\\.ru)/iX'
local yandex_received = 'Received=/^\\s*from \\S+\\.(yandex\\.ru|yandex\\.net)/mH'
local yandex = string.format('(%s) & ((%s) | (%s) | (%s))', yandex_received, yandex_from, yandex_x_envelope_from, yandex_return_path)
-- Tabs as delimiters between header names and header values
function check_header_delimiter_tab(task, header_name)
for _,rh in ipairs(task:get_header_full(header_name)) do
if rh['tab_separated'] then return true end
end
return false
end
reconf['HEADER_FROM_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(From)', yandex)
reconf['HEADER_TO_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(To)', yandex)
reconf['HEADER_CC_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(Cc)', yandex)
reconf['HEADER_REPLYTO_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(Reply-To)', yandex)
reconf['HEADER_DATE_DELIMITER_TAB'] = string.format('(%s) & !(%s)', 'check_header_delimiter_tab(Date)', yandex)
-- Empty delimiters between header names and header values
function check_header_delimiter_empty(task, header_name)
for _,rh in ipairs(task:get_header_full(header_name)) do
if rh['empty_separator'] then return true end
end
return false
end
reconf['HEADER_FROM_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(From)')
reconf['HEADER_TO_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(To)')
reconf['HEADER_CC_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(Cc)')
reconf['HEADER_REPLYTO_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(Reply-To)')
reconf['HEADER_DATE_EMPTY_DELIMITER'] = string.format('(%s)', 'check_header_delimiter_empty(Date)')

-- Definitions of received headers regexp
reconf['RCVD_ILLEGAL_CHARS'] = 'Received=/[\\x80-\\xff]/X'

local MAIL_RU_Return_Path = 'Return-path=/^\\s*<.+\\@mail\\.ru>$/iX'
local MAIL_RU_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@mail\\.ru>$/iX'
local MAIL_RU_From = 'From=/\\@mail\\.ru>?$/iX'
local MAIL_RU_Received = 'Received=/from mail\\.ru \\(/mH'

reconf['FAKE_RECEIVED_mail_ru'] = string.format('(%s) & !(((%s) | (%s)) & (%s))', MAIL_RU_Received, MAIL_RU_Return_Path, MAIL_RU_X_Envelope_From, MAIL_RU_From)

local GMAIL_COM_Return_Path = 'Return-path=/^\\s*<.+\\@gmail\\.com>$/iX'
local GMAIL_COM_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@gmail\\.com>$/iX'
local GMAIL_COM_From = 'From=/\\@gmail\\.com>?$/iX'

local UKR_NET_Return_Path = 'Return-path=/^\\s*<.+\\@ukr\\.net>$/iX'
local UKR_NET_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@ukr\\.net>$/iX'
local UKR_NET_From = 'From=/\\@ukr\\.net>?$/iX'

local RECEIVED_smtp_yandex_ru_1 = 'Received=/from \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] \\((port=\\d+ )?helo=smtp\\.yandex\\.ru\\)/iX'
local RECEIVED_smtp_yandex_ru_2 = 'Received=/from \\[UNAVAILABLE\\] \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]:\\d+ helo=smtp\\.yandex\\.ru\\)/iX'
local RECEIVED_smtp_yandex_ru_3 = 'Received=/from \\S+ \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]:\\d+ helo=smtp\\.yandex\\.ru\\)/iX'
local RECEIVED_smtp_yandex_ru_4 = 'Received=/from \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] \\(account \\S+ HELO smtp\\.yandex\\.ru\\)/iX'
local RECEIVED_smtp_yandex_ru_5 = 'Received=/from smtp\\.yandex\\.ru \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]\\)/iX'
local RECEIVED_smtp_yandex_ru_6 = 'Received=/from smtp\\.yandex\\.ru \\(\\S+ \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]\\)/iX'
local RECEIVED_smtp_yandex_ru_7 = 'Received=/from \\S+ \\(HELO smtp\\.yandex\\.ru\\) \\(\\S+\\@\\d+\\.\\d+\\.\\d+\\.\\d+\\)/iX'
local RECEIVED_smtp_yandex_ru_8 = 'Received=/from \\S+ \\(HELO smtp\\.yandex\\.ru\\) \\(\\d+\\.\\d+\\.\\d+\\.\\d+\\)/iX'
local RECEIVED_smtp_yandex_ru_9 = 'Received=/from \\S+ \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] helo=smtp\\.yandex\\.ru\\)/iX'

reconf['FAKE_RECEIVED_smtp_yandex_ru'] = string.format('(((%s) & ((%s) | (%s))) | ((%s) & ((%s) | (%s))) | ((%s) & ((%s) | (%s)))) & (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s)', MAIL_RU_From, MAIL_RU_Return_Path, MAIL_RU_X_Envelope_From, GMAIL_COM_From, GMAIL_COM_Return_Path, GMAIL_COM_X_Envelope_From, UKR_NET_From, UKR_NET_Return_Path, UKR_NET_X_Envelope_From, RECEIVED_smtp_yandex_ru_1, RECEIVED_smtp_yandex_ru_2, RECEIVED_smtp_yandex_ru_3, RECEIVED_smtp_yandex_ru_4, RECEIVED_smtp_yandex_ru_5, RECEIVED_smtp_yandex_ru_6, RECEIVED_smtp_yandex_ru_7, RECEIVED_smtp_yandex_ru_8, RECEIVED_smtp_yandex_ru_9)

reconf['FORGED_GENERIC_RECEIVED'] = 'Received=/^\\s*(.+\\n)*from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by (([\\w\\d-]+\\.)+[a-zA-Z]{2,6}|\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}); \\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0/X'

reconf['FORGED_GENERIC_RECEIVED2'] = 'Received=/^\\s*(.+\\n)*from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by ([\\w\\d-]+\\.)+[a-z]{2,6} id [\\w\\d]{12}; \\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0/X'

reconf['FORGED_GENERIC_RECEIVED3'] = 'Received=/^\\s*(.+\\n)*by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with SMTP id [a-zA-Z]{14}\\.\\d{13};[\\r\\n\\s]*\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0 \\(GMT\\)/X'

reconf['FORGED_GENERIC_RECEIVED4'] = 'Received=/^\\s*(.+\\n)*from localhost by \\S+;\\s+\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0[\\s\\r\\n]*$/X'

rspamd_config.FORGED_GENERIC_RECEIVED5 = function (task)
local regexp_text = '^\\s*from \\[(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})\\].*\\n(.+\\n)*\\s*from \\1 by \\S+;\\s+\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0$'
local re = rspamd_regexp.create_cached(regexp_text, 'i')
local headers_recv = task:get_header_full('Received')
if headers_recv then
for _,header_r in ipairs(headers_recv) do
if re:match(header_r['value']) then
return true
end
end
end
return false
end

reconf['INVALID_POSTFIX_RECEIVED'] = 'Received=/ \\(Postfix\\) with ESMTP id [A-Z\\d]+([\\s\\r\\n]+for <\\S+?>)?;[\\s\\r\\n]*[A-Z][a-z]{2}, \\d{1,2} [A-Z][a-z]{2} \\d\\d\\d\\d \\d\\d:\\d\\d:\\d\\d [\\+\\-]\\d\\d\\d\\d$/X'

rspamd_config.INVALID_EXIM_RECEIVED = function (task)
local checked = 0
local headers_to = task:get_header_full('To')
if headers_to then
local headers_recv = task:get_header_full('Received')
local regexp_text = '^[^\\n]*?<?\\S+?\\@(\\S+)>?\\|.*from \\d+\\.\\d+\\.\\d+\\.\\d+ \\(HELO \\S+\\)[\\s\\r\\n]*by \\1 with esmtp \\(\\S*?[\\?\\@\\(\\)\\s\\.\\+\\*\'\'\\/\\\\,]\\S*\\)[\\s\\r\\n]+id \\S*?[\\)\\(<>\\/\\\\,\\-:=]'
local re = rspamd_regexp.create_cached(regexp_text, 's')
if headers_recv then
for _,header_to in ipairs(headers_to) do
for _,header_r in ipairs(headers_recv) do
if re:match(header_to['value'].."|"..header_r['value']) then
return true
end
end
checked = checked + 1
if checked > 5 then
-- Stop on 5 rcpt
return false
end
end
end
end
return false
end

rspamd_config.INVALID_EXIM_RECEIVED2 = function (task)
local checked = 0
local headers_to = task:get_header_full('To')
if headers_to then
local headers_recv = task:get_header_full('Received')
local regexp_text = '^[^\\n]*?<?\\S+?\\@(\\S+)>?\\|.*from \\d+\\.\\d+\\.\\d+\\.\\d+ \\(HELO \\S+\\)[\\s\\r\\n]*by \\1 with esmtp \\([A-Z]{9,12} [A-Z]{5,6}\\)[\\s\\r\\n]+id [a-zA-Z\\d]{6}-[a-zA-Z\\d]{6}-[a-zA-Z\\d]{2}[\\s\\r\\n]+'
local re = rspamd_regexp.create_cached(regexp_text, 's')
if headers_recv then
for _,header_to in ipairs(headers_to) do
for _,header_r in ipairs(headers_recv) do
if re:match(header_to['value'].."|"..header_r['value']) then
return true
end
end
checked = checked + 1
if checked > 5 then
-- Stop on 5 rcpt
return false
end
end
end
end
return false
end

+ 33
- 0
rules/regexp/lotto.lua Целия файл

@@ -0,0 +1,33 @@
-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license:
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to you under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at:
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
-- Rules that are specific for lotto spam messages

local reconf = config['regexp']

local r_lotto_from = 'From=/(?:lottery|News center|congratulation to you|NED INFO|BRITISH NATIONAL HEADQUATERS|MICROSOFT ON LINE SUPPORT TEAM|prize|online notification)/iH'
local r_lotto_subject = 'Subject=/(?:\\xA3\\d|pounds?|FINAL NOTIFICATION|FOR YOUR ATTENTION|File in Your Claims?|ATTN|prize|Claims requirement|amount|confirm|your e-mail address won|congratulations)/iH'
local r_lotto_body = '/(?:won|winning|\\xA3\\d|pounds?|GBP|LOTTERY|awards|prize)/isrP'
local kam_lotto1 = '/(e-?mail address (have emerged a winner|has won|attached to (ticket|reference)|was one of the ten winners)|random selection in our computerized email selection system)/isrP'
local kam_lotto2 = '/((ticket|serial|lucky) number|secret pin ?code|batch number|reference number|promotion date)/isrP'
local kam_lotto3 = '/(won|claim|cash prize|pounds? sterling)/isrP'
local kam_lotto4 = '/(claims (officer|agent)|lottery coordinator|fiduciary (officer|agent)|fiduaciary claims)/isrP'
local kam_lotto5 = '/(freelotto group|Royal Heritage Lottery|UK National (Online)? Lottery|U\\.?K\\.? Grand Promotions|Lottery Department UK|Euromillion Loteria|Luckyday International Lottery|International Lottery)/isrP'
local kam_lotto6 = '/(Dear Lucky Winner|Winning Notification|Attention:Winner|Dear Winner)/isrP'
local kam_lotto7 = 'Subject=/(Your Lucky Day|(Attention:|ONLINE) WINNER)/iH'
reconf['R_LOTTO'] = string.format('((%s) | (%s) | (%s)) & (((%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s)) >= 3)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], r_lotto_from, r_lotto_subject, r_lotto_body, kam_lotto1, kam_lotto2, kam_lotto3, kam_lotto4, kam_lotto5, kam_lotto6)


+ 144
- 0
rules/rspamd.classifiers.lua Целия файл

@@ -0,0 +1,144 @@
--[[
Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
]]--

-- Detect language of message and selects appropriate statfiles for it

-- Common labels for specific statfiles
local many_recipients_label = 'many recipients'
local undisclosed_recipients_label = 'undisclosed recipients'
local list_label = 'maillist'
local long_subject_label = 'long subject'

-- Get specific statfiles set based on message rules
local function get_specific_statfiles(classifier, task)
if not table.foreach then
table.foreach = function(t, f)
for k, v in pairs(t) do f(k, v) end
end
end
local spec_st = {}
-- More 5 recipients
local st_many = classifier:get_statfile_by_label(many_recipients_label)
if st_many then
rcpt = task:get_recipients(2)
if rcpt and table.maxn(rcpt) > 5 then
print(table.maxn(rcpt))
table.foreach(st_many, function(i,v) table.insert(spec_st,v) end)
end
end
-- Undisclosed
local st_undisc = classifier:get_statfile_by_label(undisclosed_recipients_label)
if st_undisc then
rcpt = task:get_recipients(2)
if rcpt and table.maxn(rcpt) == 0 then
table.foreach(st_undisc, function(i,v) table.insert(spec_st,v) end)
end
end
-- Maillist
local st_maillist = classifier:get_statfile_by_label(list_label)
if st_maillist then
local unsub_header = task:get_header_raw('List-Unsubscribe')
if unsub_header then
table.foreach(st_maillist, function(i,v) table.insert(spec_st,v) end)
end
end
-- Long subject
local st_longsubj = classifier:get_statfile_by_label(long_subject_label)
if st_longsubj then
local subj = task:get_header_raw('Subject')
if subj and string.len(subj) > 150 then
table.foreach(st_longsubj, function(i,v) table.insert(spec_st,v) end)
end
end
if table.maxn(spec_st) > 1 then
return spec_st
else
return nil
end
end

classifiers['bayes'] = function(classifier, task, is_learn, is_spam)
-- Subfunction for detection of message's language
local detect_language = function(task)
local parts = task:get_text_parts()
for _,p in ipairs(parts) do
local l = p:get_language()
if l then
return l
end
end
return nil
end

-- Main procedure
local selected = {}
local spec_st = get_specific_statfiles(classifier, task)
if spec_st then
if is_learn then
return spec_st
else
-- Merge tables
table.foreach(spec_st, function(i,v) table.insert(selected,v) end)
end
end
-- Detect statfile by language
language = detect_language(task)
if language then
-- Find statfiles with specified language
for _,st in ipairs(classifier:get_statfiles()) do
-- Skip labeled statfiles
if not st:get_label() then
local st_l = st:get_param('language')
if st_l and st_l == language then
-- Insert statfile with specified language
table.insert(selected, st)
end
end
end
if table.maxn(selected) > 1 then
return selected
end
end

-- Language not detected or specific language statfiles have not been found
for _,st in ipairs(classifier:get_statfiles()) do
-- Skip labeled statfiles
if not st:get_label() then
local st_l = st:get_param('language')
-- Insert only statfiles without language
if not st_l then
table.insert(selected, st)
end
end
end
if table.maxn(selected) > 1 then
return selected
end
return nil
end


+ 120
- 0
rules/rspamd.lua Целия файл

@@ -0,0 +1,120 @@
--[[
Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
]]--

-- This is main lua config file for rspamd

config['regexp'] = {}

dofile('regexp/headers.lua')
dofile('regexp/lotto.lua')
dofile('regexp/fraud.lua')
dofile('regexp/drugs.lua')
dofile('html.lua')

local reconf = config['regexp']
local util = require "rspamd_util"

-- Uncategorized rules

-- Local rules
local r_bgcolor = '/BGCOLOR=/iP'
local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP'
reconf['R_WHITE_ON_WHITE'] = string.format('(!(%s) & (%s))', r_bgcolor, r_font_color)
reconf['R_FLASH_REDIR_IMGSHACK'] = '/^(?:http:\\/\\/)?img\\d{1,5}\\.imageshack\\.us\\/\\S+\\.swf/U'

-- Different text parts
rspamd_config.R_PARTS_DIFFER = function(task)
local distance = task:get_mempool():get_variable('parts_distance', 'int')
if distance then
local nd = tonumber(distance)
if nd < 50 then
local score = 1 - util.tanh(nd / 100.0)
task:insert_result('R_PARTS_DIFFER', score, tostring(nd) .. '%')
end
end
return false
end

-- Date issues
rspamd_config.MISSING_DATE = function(task)
if rspamd_config:get_api_version() >= 5 then
if not task:get_header_raw('Date') then
return true
end
end
return false
end
rspamd_config.DATE_IN_FUTURE = function(task)
if rspamd_config:get_api_version() >= 5 then
local dm = task:get_date{format = 'message'}
local dt = task:get_date{format = 'connect'}
-- An 2 hour
if dm > 0 and dm - dt > 7200 then
return true
end
end
return false
end
rspamd_config.DATE_IN_PAST = function(task)
if rspamd_config:get_api_version() >= 5 then
local dm = task:get_date{format = 'message', gmt = true}
local dt = task:get_date{format = 'connect', gmt = true}
-- A day
if dm > 0 and dt - dm > 86400 then
return true
end
end
return false
end

local function file_exists(filename)
local file = io.open(filename)
if file then
io.close(file)
return true
else
return false
end
end

if file_exists('hfilter.lua') then
dofile('hfilter.lua')
end

if file_exists('rspamd.local.lua') then
dofile('rspamd.local.lua')
end

if file_exists('rspamd.classifiers.lua') then
dofile('rspamd.classifiers.lua')
end

Loading…
Отказ
Запис