--[[ Copyright (c) 2015, Vsevolod Stakhov Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]]-- -- This plugin is intended to read and parse spamassassin rules with regexp -- rules. SA plugins or statistics are not supported local rspamd_logger = require "rspamd_logger" local rspamd_regexp = require "rspamd_regexp" local rspamd_expression = require "rspamd_expression" local rspamd_mempool = require "rspamd_mempool" local rspamd_trie = require "rspamd_trie" local util = require "rspamd_util" require "fun" () -- Known plugins local known_plugins = { 'Mail::SpamAssassin::Plugin::FreeMail', 'Mail::SpamAssassin::Plugin::HeaderEval', 'Mail::SpamAssassin::Plugin::ReplaceTags', 'Mail::SpamAssassin::Plugin::RelayEval', 'Mail::SpamAssassin::Plugin::MIMEEval', 'Mail::SpamAssassin::Plugin::BodyEval', 'Mail::SpamAssassin::Plugin::MIMEHeader', 'Mail::SpamAssassin::Plugin::WLBLEval', } -- Table that replaces SA symbol with rspamd equialent -- Used for dependency resolution local symbols_replacements = { -- SPF replacements USER_IN_SPF_WHITELIST = 'WHITELIST_SPF', USER_IN_DEF_SPF_WL = 'WHITELIST_SPF', SPF_PASS = 'R_SPF_ALLOW', SPF_FAIL = 'R_SPF_FAIL', SPF_SOFTFAIL = 'R_SPF_SOFTFAIL', SPF_HELO_PASS = 'R_SPF_ALLOW', SPF_HELLO_FAIL = 'R_SPF_FAIL', SPF_HELLO_SOFTFAIL = 'R_SPF_SOFTFAIL', -- DKIM replacements USER_IN_DKIM_WHITELIST = 'WHITELIST_DKIM', USER_IN_DEF_DKIM_WL = 'WHITELIST_DKIM', DKIM_VALID = 'R_DKIM_ALLOW', -- SURBL replacements URIBL_SBL_A = 'URIBL_SBL', URIBL_DBL_SPAM = 'DBL_SPAM', URIBL_DBL_PHISH = 'DBL_PHISH', URIBL_DBL_MALWARE = 'DBL_MALWARE', URIBL_DBL_BOTNETCC = 'DBL_BOTNET', URIBL_DBL_ABUSE_SPAM = 'DBL_ABUSE', URIBL_DBL_ABUSE_REDIR = 'DBL_ABUSE_REDIR', URIBL_DBL_ABUSE_MALW = 'DBL_ABUSE_MALWARE', URIBL_DBL_ABUSE_BOTCC = 'DBL_ABUSE_BOTNET', URIBL_WS_SURBL = 'WS_SURBL_MULTI', URIBL_PH_SURBL = 'PH_SURBL_MULTI', URIBL_MW_SURBL = 'MW_SURBL_MULTI', URIBL_CR_SURBL = 'CRACKED_SURBL', URIBL_ABUSE_SURBL = 'ABUSE_SURBL', -- Misc rules BODY_URI_ONLY = 'R_EMPTY_IMAGE', HTML_IMAGE_ONLY_04 = 'HTML_SHORT_LINK_IMG_1', HTML_IMAGE_ONLY_08 = 'HTML_SHORT_LINK_IMG_1', HTML_IMAGE_ONLY_12 = 'HTML_SHORT_LINK_IMG_1', HTML_IMAGE_ONLY_16 = 'HTML_SHORT_LINK_IMG_2', HTML_IMAGE_ONLY_20 = 'HTML_SHORT_LINK_IMG_2', HTML_IMAGE_ONLY_24 = 'HTML_SHORT_LINK_IMG_3', HTML_IMAGE_ONLY_28 = 'HTML_SHORT_LINK_IMG_3', HTML_IMAGE_ONLY_32 = 'HTML_SHORT_LINK_IMG_3', } -- Internal variables local rules = {} local atoms = {} local metas = {} local scores = {} local scores_added = {} local external_deps = {} local freemail_domains = {} local pcre_only_regexps = {} local freemail_trie local sa_mempool = rspamd_mempool.create() local replace = { tags = {}, pre = {}, inter = {}, post = {}, rules = {}, } local internal_regexp = { date_shift = rspamd_regexp.create("^\\(\\s*'((?:-?\\d+)|(?:undef))'\\s*,\\s*'((?:-?\\d+)|(?:undef))'\\s*\\)$") } -- Mail::SpamAssassin::Plugin::WLBLEval plugin local sa_lists = { from_blacklist = {}, from_whitelist = {}, from_def_whitelist = {}, to_blacklist = {}, to_whitelist = {}, elts = 0, } local func_cache = {} local section = rspamd_config:get_all_opt("spamassassin") -- Minimum score to treat symbols as meta local meta_score_alpha = 0.5 -- Maximum size of regexp checked local match_limit = 0 local function split(str, delim) local result = {} if not delim then delim = '[^%s]+' end for token in string.gmatch(str, delim) do table.insert(result, token) end return result end local function replace_symbol(s) local rspamd_symbol = symbols_replacements[s] if not rspamd_symbol then return s, false end return rspamd_symbol, true end local function trim(s) return s:match "^%s*(.-)%s*$" end local function is_pcre_only(name) if pcre_only_regexps[name] then return true end return false end local function handle_header_def(hline, cur_rule) --Now check for modifiers inside header's name local hdrs = split(hline, '[^|]+') local hdr_params = {} local cur_param = {} -- Check if an re is an ordinary re local ordinary = true for i,h in ipairs(hdrs) do if h == 'ALL' or h == 'ALL:raw' then ordinary = false cur_rule['type'] = 'function' -- Pack closure local re = cur_rule['re'] local not_f = cur_rule['not'] local sym = cur_rule['symbol'] -- Rule to match all headers rspamd_config:register_regexp({ re = re, type = 'allheader', pcre_only = is_pcre_only(cur_rule['symbol']), }) cur_rule['function'] = function(task) if not re then rspamd_logger.errx(task, 're is missing for rule %1', h) return 0 end return task:process_regexp(re, 'allheader') end else local args = split(h, '[^:]+') cur_param['strong'] = false cur_param['raw'] = false cur_param['header'] = args[1] if args[2] then -- We have some ops that are required for the header, so it's not ordinary ordinary = false end each(function(func) if func == 'addr' then cur_param['function'] = function(str) local addr_parsed = util.parse_addr(str) local ret = {} if addr_parsed then for i,elt in ipairs(addr_parsed) do if elt['addr'] then table.insert(ret, elt['addr']) end end end return ret end elseif func == 'name' then cur_param['function'] = function(str) local addr_parsed = util.parse_addr(str) local ret = {} if addr_parsed then for i,elt in ipairs(addr_parsed) do if elt['name'] then table.insert(ret, elt['name']) end end end return ret end elseif func == 'raw' then cur_param['raw'] = true elseif func == 'case' then cur_param['strong'] = true else rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2', func, cur_rule['symbol']) end end, tail(args)) local function split_hdr_param(param, headers) for i,h in ipairs(headers) do local nparam = {} for k,v in pairs(param) do if k ~= 'header' then nparam[k] = v end end nparam['header'] = h table.insert(hdr_params, nparam) end end -- Some header rules require splitting to check of multiple headers if cur_param['header'] == 'MESSAGEID' then -- Special case for spamassassin ordinary = false split_hdr_param(cur_param, { 'Message-ID', 'X-Message-ID', 'Resent-Message-ID'}) elseif cur_param['header'] == 'ToCc' then ordinary = false split_hdr_param(cur_param, { 'To', 'Cc', 'Bcc' }) else table.insert(hdr_params, cur_param) end end cur_rule['ordinary'] = ordinary cur_rule['header'] = hdr_params end end local function freemail_search(input) local res = 0 local function trie_callback(number, pos) rspamd_logger.debugx('Matched pattern %1 at pos %2', freemail_domains[number], pos) res = res + 1 end if input then freemail_trie:match(input, trie_callback, true) end return res end local function gen_eval_rule(arg) local eval_funcs = { {'check_freemail_from', function(task, remain) local from = task:get_from('mime') if from and from[1] then return freemail_search(string.lower(from[1]['addr'])) end return 0 end}, {'check_freemail_replyto', function(task, remain) return freemail_search(task:get_header('Reply-To')) end }, {'check_freemail_header', function(task, remain) -- Remain here contains one or two args: header and regexp to match local arg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$") local re = nil if not arg then arg, re = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*,%s*['\"]([^%s]+)['\"]%s*%)$") end if arg then local h = task:get_header(arg) if h then local hdr_freemail = freemail_search(string.lower(h)) if hdr_freemail > 0 and re then local r = rspamd_regexp.create_cached(re) if r then if r:match(h) then return 1 end else rspamd_logger.infox(rspamd_config, 'cannot create regexp %1', re) return 0 end end return hdr_freemail end end return 0 end }, { 'check_for_missing_to_header', function (task, remain) local th = task:get_recipients('mime') if not th or #th == 0 then return 1 end return 0 end }, { 'check_relays_unparseable', function(task, remain) local rh_mime = task:get_header_full('Received') local rh_parsed = task:get_received_headers() local rh_cnt = 0 if rh_mime then rh_cnt = #rh_mime end local parsed_cnt = 0 if rh_parsed then parsed_cnt = #rh_parsed end return rh_cnt - parsed_cnt end }, { 'check_for_shifted_date', function (task, remain) -- Remain here contains two args: start and end hours shift local matches = internal_regexp['date_shift']:search(remain, true, true) if matches and matches[1] then local min_diff = matches[1][2] local max_diff = matches[1][3] if min_diff == 'undef' then min_diff = 0 else min_diff = tonumber(min_diff) * 3600 end if max_diff == 'undef' then max_diff = 0 else max_diff = tonumber(max_diff) * 3600 end -- Now get the difference between Date and message received date local dm = task:get_date { format = 'message', gmt = true} local dt = task:get_date { format = 'connect', gmt = true} local diff = dm - dt if (max_diff == 0 and diff >= min_diff) or (min_diff == 0 and diff <= max_diff) or (diff >= min_diff and diff <= max_diff) then return 1 end end return 0 end }, { 'check_for_mime', function(task, remain) local arg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$") if arg then if arg == 'mime_attachment' then local parts = task:get_parts() if parts then for i,p in ipairs(parts) do if p:get_filename() then return 1 end end end else rspamd_logger.infox(task, 'unimplemented mime check %1', arg) end end return 0 end }, { 'check_from_in_blacklist', function(task, remain) local from = task:get_from('mime') if from and from[1] and from[1]['addr'] then if sa_lists['from_blacklist'][string.lower(from[1]['addr'])] then return 1 end end return 0 end }, { 'check_from_in_whitelist', function(task, remain) local from = task:get_from('mime') if from and from[1] and from[1]['addr'] then if sa_lists['from_whitelist'][string.lower(from[1]['addr'])] then return 1 end end return 0 end }, { 'check_from_in_default_whitelist', function(task, remain) local from = task:get_from('mime') if from and from[1] and from[1]['addr'] then if sa_lists['from_def_whitelist'][string.lower(from[1]['addr'])] then return 1 end end return 0 end }, { 'check_to_in_blacklist', function(task, remain) local rcpt = task:get_recipients('mime') if rcpt then for i,r in ipairs(rcpt) do if sa_lists['to_blacklist'][string.lower(r['addr'])] then return 1 end end end return 0 end }, { 'check_to_in_whitelist', function(task, remain) local rcpt = task:get_recipients('mime') if rcpt then for i,r in ipairs(rcpt) do if sa_lists['to_whitelist'][string.lower(r['addr'])] then return 1 end end end return 0 end }, } for k,f in ipairs(eval_funcs) do local pat = string.format('^%s', f[1]) local first,last = string.find(arg, pat) if first then local func_arg = string.sub(arg, last + 1) return function(task) return f[2](task, func_arg) end end end end -- Returns parser function or nil local function maybe_parse_sa_function(line) local arg local elts = split(line, '[^:]+') arg = elts[2] rspamd_logger.debugx(rspamd_config, 'trying to parse SA function %1 with args %2', elts[1], elts[2]) local substitutions = { {'^exists:', function(task) -- filter local hdrs_check = {} if arg == 'MESSAGEID' then hdrs_check = { 'Message-ID', 'X-Message-ID', 'Resent-Message-ID' } elseif arg == 'ToCc' then hdrs_check = { 'To', 'Cc', 'Bcc' } else hdrs_check = {arg} end for i,h in ipairs(hdrs_check) do if task:get_header(h) then return 1 end end return 0 end, }, {'^eval:', function(task) local func = func_cache[arg] if not func then func = gen_eval_rule(arg) func_cache[arg] = func end if not func then rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %1', arg) else return func(task) end return 0 end }, } for k,s in ipairs(substitutions) do if string.find(line, s[1]) then return s[2] end end return nil end local function words_to_re(words, start) return table.concat(totable(drop_n(start, words)), " "); end local function process_tflags(rule, flags) each(function(flag) if flag == 'publish' then rule['publish'] = true elseif flag == 'multiple' then rule['multiple'] = true elseif string.match(flag, '^maxhits=(%d+)$') then rule['maxhits'] = tonumber(string.match(flag, '^maxhits=(%d+)$')) elseif flag == 'nice' then rule['nice'] = true end end, drop_n(1, flags)) if rule['re'] then if rule['maxhits'] then rule['re']:set_max_hits(rule['maxhits']) elseif rule['multiple'] then rule['re']:set_max_hits(0) else rule['re']:set_max_hits(1) end end end local function process_replace(words, tbl) local re = words_to_re(words, 2) tbl[words[2]] = re end local function process_sa_conf(f) local cur_rule = {} local valid_rule = false local function insert_cur_rule() if cur_rule['type'] ~= 'meta' and cur_rule['publish'] then -- Create meta rule from this rule local nsym = '__fake' .. cur_rule['symbol'] local nrule = { type = 'meta', symbol = cur_rule['symbol'], score = cur_rule['score'], meta = nsym, description = cur_rule['description'], } rules[nrule['symbol']] = nrule cur_rule['symbol'] = nsym end -- We have previous rule valid if not cur_rule['symbol'] then rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule) end rules[cur_rule['symbol']] = cur_rule cur_rule = {} valid_rule = false end local function parse_score(words) if #words == 3 then -- score rule rspamd_logger.debugx(rspamd_config, 'found score for %1: %2', words[2], words[3]) return tonumber(words[3]) elseif #words == 6 then -- score rule -- we assume here that bayes and network are enabled and select rspamd_logger.debugx(rspamd_config, 'found score for %1: %2', words[2], words[6]) return tonumber(words[6]) else rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2]) end return 0 end local skip_to_endif = false local if_nested = 0 for l in f:lines() do (function () l = trim(l) if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then return end if skip_to_endif then if string.match(l, '^endif') then if_nested = if_nested - 1 if if_nested == 0 then skip_to_endif = false end elseif string.match(l, '^if') then if_nested = if_nested + 1 end return else if string.match(l, '^ifplugin') then local ls = split(l) if not any(function(pl) if pl == ls[2] then return true end return false end, known_plugins) then skip_to_endif = true if_nested = 1 end elseif string.match(l, '^if !plugin%(') then local pname = string.match(l, '^if !plugin%(([A-Za-z:]+)%)') if any(function(pl) if pl == pname then return true end return false end, known_plugins) then skip_to_endif = true if_nested = 1 end elseif string.match(l, '^if') then -- Unknown if skip_to_endif = true if_nested = 1 end end local slash = string.find(l, '/') -- Skip comments words = totable(take_while( function(w) return string.sub(w, 1, 1) ~= '#' end, filter(function(w) return w ~= "" end, iter(split(l))))) if words[1] == "header" or words[1] == 'mimeheader' then -- header SYMBOL Header ~= /regexp/ if valid_rule then insert_cur_rule() end if words[4] and (words[4] == '=~' or words[4] == '!~') then cur_rule['type'] = 'header' cur_rule['symbol'] = words[2] if words[4] == '!~' then cur_rule['not'] = true end cur_rule['re_expr'] = words_to_re(words, 4) local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:') if unset_comp then -- We have optional part that needs to be processed unset = string.match(string.sub(cur_rule['re_expr'], unset_comp), '%[if%-unset:%s*([^%]%s]+)]') cur_rule['unset'] = unset -- Cut it down cur_rule['re_expr'] = string.sub(cur_rule['re_expr'], 1, unset_comp - 1) end cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr']) if not cur_rule['re'] then rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2", cur_rule['re_expr'], cur_rule['symbol']) else cur_rule['re']:set_max_hits(1) handle_header_def(words[3], cur_rule) end if words[1] == 'mimeheader' then cur_rule['mime'] = true else cur_rule['mime'] = false end if cur_rule['re'] and cur_rule['symbol'] and (cur_rule['header'] or cur_rule['function']) then valid_rule = true cur_rule['re']:set_max_hits(1) if cur_rule['header'] and cur_rule['ordinary'] then for i,h in ipairs(cur_rule['header']) do if type(h) == 'string' then if cur_rule['mime'] then rspamd_config:register_regexp({ re = cur_rule['re'], type = 'mimeheader', header = h, pcre_only = is_pcre_only(cur_rule['symbol']), }) else rspamd_config:register_regexp({ re = cur_rule['re'], type = 'header', header = h, pcre_only = is_pcre_only(cur_rule['symbol']), }) end else h['mime'] = cur_rule[mime] if cur_rule['mime'] then rspamd_config:register_regexp({ re = cur_rule['re'], type = 'mimeheader', header = h['header'], pcre_only = is_pcre_only(cur_rule['symbol']), }) else if h['raw'] then rspamd_config:register_regexp({ re = cur_rule['re'], type = 'rawheader', header = h['header'], pcre_only = is_pcre_only(cur_rule['symbol']), }) else rspamd_config:register_regexp({ re = cur_rule['re'], type = 'header', header = h['header'], pcre_only = is_pcre_only(cur_rule['symbol']), }) end end end end cur_rule['re']:set_limit(match_limit) cur_rule['re']:set_max_hits(1) end end else -- Maybe we know the function and can convert it local args = words_to_re(words, 2) local func = maybe_parse_sa_function(args) if func then cur_rule['type'] = 'function' cur_rule['symbol'] = words[2] cur_rule['function'] = func valid_rule = true else rspamd_logger.infox(rspamd_config, 'unknown function %1', args) end end elseif words[1] == "body" then -- body SYMBOL /regexp/ if valid_rule then insert_cur_rule() end cur_rule['symbol'] = words[2] if words[3] and (string.sub(words[3], 1, 1) == '/' or string.sub(words[3], 1, 1) == 'm') then cur_rule['type'] = 'sabody' cur_rule['re_expr'] = words_to_re(words, 2) cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr']) if cur_rule['re'] then rspamd_config:register_regexp({ re = cur_rule['re'], type = 'sabody', pcre_only = is_pcre_only(cur_rule['symbol']), }) valid_rule = true cur_rule['re']:set_limit(match_limit) cur_rule['re']:set_max_hits(1) end else -- might be function local args = words_to_re(words, 2) local func = maybe_parse_sa_function(args) if func then cur_rule['type'] = 'function' cur_rule['symbol'] = words[2] cur_rule['function'] = func valid_rule = true else rspamd_logger.infox(rspamd_config, 'unknown function %1', args) end end elseif words[1] == "rawbody" then -- body SYMBOL /regexp/ if valid_rule then insert_cur_rule() end cur_rule['symbol'] = words[2] if words[3] and (string.sub(words[3], 1, 1) == '/' or string.sub(words[3], 1, 1) == 'm') then cur_rule['type'] = 'sarawbody' cur_rule['re_expr'] = words_to_re(words, 2) cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr']) if cur_rule['re'] then rspamd_config:register_regexp({ re = cur_rule['re'], type = 'sarawbody', pcre_only = is_pcre_only(cur_rule['symbol']), }) valid_rule = true cur_rule['re']:set_limit(match_limit) cur_rule['re']:set_max_hits(1) end else -- might be function local args = words_to_re(words, 2) local func = maybe_parse_sa_function(args) if func then cur_rule['type'] = 'function' cur_rule['symbol'] = words[2] cur_rule['function'] = func valid_rule = true else rspamd_logger.infox(rspamd_config, 'unknown function %1', args) end end elseif words[1] == "full" then -- body SYMBOL /regexp/ if valid_rule then insert_cur_rule() end cur_rule['symbol'] = words[2] if words[3] and (string.sub(words[3], 1, 1) == '/' or string.sub(words[3], 1, 1) == 'm') then cur_rule['type'] = 'message' cur_rule['re_expr'] = words_to_re(words, 2) cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr']) cur_rule['raw'] = true if cur_rule['re'] then valid_rule = true rspamd_config:register_regexp({ re = cur_rule['re'], type = 'body', pcre_only = is_pcre_only(cur_rule['symbol']), }) cur_rule['re']:set_limit(match_limit) cur_rule['re']:set_max_hits(1) end else -- might be function local args = words_to_re(words, 2) local func = maybe_parse_sa_function(args) if func then cur_rule['type'] = 'function' cur_rule['symbol'] = words[2] cur_rule['function'] = func valid_rule = true else rspamd_logger.infox(rspamd_config, 'unknown function %1', args) end end elseif words[1] == "uri" then -- uri SYMBOL /regexp/ if valid_rule then insert_cur_rule() end cur_rule['type'] = 'uri' cur_rule['symbol'] = words[2] cur_rule['re_expr'] = words_to_re(words, 2) cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr']) if cur_rule['re'] and cur_rule['symbol'] then valid_rule = true rspamd_config:register_regexp({ re = cur_rule['re'], type = 'url', pcre_only = is_pcre_only(cur_rule['symbol']), }) cur_rule['re']:set_limit(match_limit) cur_rule['re']:set_max_hits(1) end elseif words[1] == "meta" then -- meta SYMBOL expression if valid_rule then insert_cur_rule() end cur_rule['type'] = 'meta' cur_rule['symbol'] = words[2] cur_rule['meta'] = words_to_re(words, 2) if cur_rule['meta'] and cur_rule['symbol'] then valid_rule = true end elseif words[1] == "describe" and valid_rule then cur_rule['description'] = words_to_re(words, 2) elseif words[1] == "score" then scores[words[2]] = parse_score(words) elseif words[1] == 'freemail_domains' then each(function(dom) table.insert(freemail_domains, '@' .. dom) end, drop_n(1, words)) elseif words[1] == 'blacklist_from' then sa_lists['from_blacklist'][words[2]] = 1 sa_lists['elts'] = sa_lists['elts'] + 1 elseif words[1] == 'whitelist_from' then sa_lists['from_whitelist'][words[2]] = 1 sa_lists['elts'] = sa_lists['elts'] + 1 elseif words[1] == 'whitelist_to' then sa_lists['to_whitelist'][words[2]] = 1 sa_lists['elts'] = sa_lists['elts'] + 1 elseif words[1] == 'blacklist_to' then sa_lists['to_blacklist'][words[2]] = 1 sa_lists['elts'] = sa_lists['elts'] + 1 elseif words[1] == 'tflags' then process_tflags(cur_rule, words) elseif words[1] == 'replace_tag' then process_replace(words, replace['tags']) elseif words[1] == 'replace_pre' then process_replace(words, replace['pre']) elseif words[1] == 'replace_inter' then process_replace(words, replace['inter']) elseif words[1] == 'replace_post' then process_replace(words, replace['post']) elseif words[1] == 'replace_rules' then each(function(r) table.insert(replace['rules'], r) end, drop_n(1, words)) end end)() end if valid_rule then insert_cur_rule() end end -- Now check all valid rules and add the according rspamd rules local function calculate_score(sym, rule) if all(function(c) return c == '_' end, take_n(2, iter(sym))) then return 0.0 end if rule['nice'] or (rule['score'] and rule['score'] < 0.0) then return -1.0 end return 1.0 end local function add_sole_meta(sym, rule) local r = { type = 'meta', meta = rule['symbol'], score = rule['score'], description = rule['description'] } rules[sym] = r end local function sa_regexp_match(data, re, raw, rule) local res = 0 if not re then return 0 end if rule['multiple'] then local lim = -1 if rule['maxhits'] then lim = rule['maxhits'] end res = res + re:matchn(data, lim, raw) else if re:match(data, raw) then res = 1 end end return res end local function apply_replacements(str) local pre = "" local post = "" local inter = "" local function check_specific_tag(prefix, s, tbl) local replacement = nil local ret = s each(function(n, t) local ns,matches = string.gsub(s, string.format("<%s%s>", prefix, n), "") if matches > 0 then replacement = t ret = ns end end, tbl) return ret,replacement end local repl str,repl = check_specific_tag("pre ", str, replace['pre']) if repl then pre = repl end str,repl = check_specific_tag("inter ", str, replace['inter']) if repl then inter = repl end str,repl = check_specific_tag("post ", str, replace['post']) if repl then post = repl end -- XXX: ugly hack if inter then str = string.gsub(str, "><", string.format(">%s<", inter)) end local function replace_all_tags(s) local str, matches str = s each(function(n, t) str,matches = string.gsub(str, string.format("<%s>", n), string.format("%s%s%s", pre, t, post)) end, replace['tags']) return str end local s = replace_all_tags(str) if str ~= s then return true,s end return false,str end local function parse_atom(str) local atom = table.concat(totable(take_while(function(c) if string.find(', \t()><+!|&\n', c) then return false end return true end, iter(str))), '') return atom end local function process_atom(atom, task) local atom_cb = atoms[atom] if atom_cb then local res = atom_cb(task) if not res then rspamd_logger.debugx(task, 'atom: %1, NULL result', atom) elseif res > 0 then rspamd_logger.debugx(task, 'atom: %1, result: %2', atom, res) end return res else local ext_dep = external_deps[atom] if ext_dep then local res = 0 if type(ext_dep) == 'number' then if task:has_symbol(atom) then res = 1 end elseif type(ext_dep) == 'string' then -- Apply replacement if task:has_symbol(ext_dep) then res = 1 end end rspamd_logger.debugx(task, 'external atom: %1, result: %2', atom, res) return res else rspamd_logger.debugx(task, 'Cannot find atom ' .. atom) end end return 0 end local function post_process() -- Replace rule tags local ntags = {} local function rec_replace_tags(tag, tagv) if ntags[tag] then return ntags[tag] end each(function(n, t) if n ~= tag then local s, matches = string.gsub(tagv, string.format("<%s>", n), t) if matches > 0 then ntags[tag] = rec_replace_tags(tag, s) end end end, replace['tags']) if not ntags[tag] then ntags[tag] = tagv end return ntags[tag] end each(function(n, t) rec_replace_tags(n, t) end, replace['tags']) each(function(n, t) replace['tags'][n] = t end, ntags) each(function(r) local rule = rules[r] if rule['re_expr'] and rule['re'] then local res, nexpr = apply_replacements(rule['re_expr']) if res then local nre = rspamd_regexp.create(nexpr) if not nre then rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r) --rule['re'] = nil else local old_max_hits = rule['re']:get_max_hits() rspamd_logger.debugx(rspamd_config, 'replace %1 -> %2', r, nexpr) rspamd_config:replace_regexp({ old_re = rule['re'], new_re = nre }) rule['re'] = nre rule['re_expr'] = nexpr nre:set_limit(match_limit) nre:set_max_hits(old_max_hits) end end end end, replace['rules']) each(function(key, score) if rules[key] then rules[key]['score'] = score end end, scores) -- Header rules each(function(k, r) local f = function(task) local raw = false local check = {} -- Cached path for ordinary expressions if r['ordinary'] then local h = r['header'][1] local t = 'header' if h['raw'] then t = 'rawheader' end if not r['re'] then rspamd_logger.errx(task, 're is missing for rule %1 (%2 header)', k, h['header']) return 0 end local ret = task:process_regexp(r['re'], t, h['header'], h['strong']) if r['not'] then if ret ~= 0 then ret = 0 else ret = 1 end end return ret end -- Slow path each(function(h) local headers = {} local hname = h['header'] local hdr if h['mime'] then local parts = task:get_parts() for i, p in ipairs(parts) do local m_hdr = p:get_header_full(hname, h['strong']) if m_hdr then if not hdr then hdr = {} end for k, mh in ipairs(m_hdr) do table.insert(hdr, mh) end end end else hdr = task:get_header_full(hname, h['strong']) end if hdr then for n, rh in ipairs(hdr) do -- Subject for optimization local str if h['raw'] then str = rh['value'] raw = true else str = rh['decoded'] end if not str then return 0 end if h['function'] then str = h['function'](str) end if type(str) == 'string' then table.insert(check, str) else for ii, c in ipairs(str) do table.insert(check, c) end end end elseif r['unset'] then table.insert(check, r['unset']) end end, r['header']) if #check == 0 then if r['not'] then return 1 end return 0 end local ret = 0 for i, c in ipairs(check) do local match = sa_regexp_match(c, r['re'], raw, r) if (match > 0 and not r['not']) or (match == 0 and r['not']) then ret = 1 end end return ret end if r['score'] then local real_score = r['score'] * calculate_score(k, r) if math.abs(real_score) > meta_score_alpha then add_sole_meta(k, r) end end atoms[k] = f end, filter(function(k, r) return r['type'] == 'header' and r['header'] end, rules)) -- Custom function rules each(function(k, r) local f = function(task) local res = r['function'](task) if res and res > 0 then return res end return 0 end if r['score'] then local real_score = r['score'] * calculate_score(k, r) if math.abs(real_score) > meta_score_alpha then add_sole_meta(k, r) end end atoms[k] = f end, filter(function(k, r) return r['type'] == 'function' and r['function'] end, rules)) -- Parts rules each(function(k, r) local f = function(task) if not r['re'] then rspamd_logger.errx(task, 're is missing for rule %1', k) return 0 end local t = 'mime' if r['raw'] then t = 'rawmime' end return task:process_regexp(r['re'], t) end if r['score'] then local real_score = r['score'] * calculate_score(k, r) if math.abs(real_score) > meta_score_alpha then add_sole_meta(k, r) end end atoms[k] = f end, filter(function(k, r) return r['type'] == 'part' end, rules)) -- SA body rules each(function(k, r) local f = function(task) if not r['re'] then rspamd_logger.errx(task, 're is missing for rule %1', k) return 0 end local t = r['type'] local ret = task:process_regexp(r['re'], t) return ret end if r['score'] then local real_score = r['score'] * calculate_score(k, r) if math.abs(real_score) > meta_score_alpha then add_sole_meta(k, r) end end atoms[k] = f end, filter(function(k, r) return r['type'] == 'sabody' or r['type'] == 'message' or r['type'] == 'sarawbody' end, rules)) -- URL rules each(function(k, r) local f = function(task) if not r['re'] then rspamd_logger.errx(task, 're is missing for rule %1', k) return 0 end return task:process_regexp(r['re'], 'url') end if r['score'] then local real_score = r['score'] * calculate_score(k, r) if math.abs(real_score) > meta_score_alpha then add_sole_meta(k, r) end end atoms[k] = f end, filter(function(k, r) return r['type'] == 'uri' end, rules)) -- Meta rules each(function(k, r) local expression = nil -- Meta function callback local meta_cb = function(task) local res = 0 local trace = {} -- XXX: need to memoize result for better performance local sym = task:has_symbol(k) if not sym then if expression then res,trace = expression:process_traced(task) end if res > 0 then -- Symbol should be one shot to make it working properly task:insert_result(k, res, trace) end else res = 1 end return res end expression = rspamd_expression.create(r['meta'], {parse_atom, process_atom}, sa_mempool) if not expression then rspamd_logger.errx(rspamd_config, 'Cannot parse expression ' .. r['meta']) else if r['score'] then rspamd_config:set_metric_symbol({ name = k, score = r['score'], description = r['description'], priority = 2, one_shot = true }) scores_added[k] = 1 end rspamd_config:register_symbol({ name = k, weight = calculate_score(k, r), callback = meta_cb }) r['expression'] = expression if not atoms[k] then atoms[k] = meta_cb end end end, filter(function(k, r) return r['type'] == 'meta' end, rules)) -- Check meta rules for foreign symbols and register dependencies -- First direct dependencies: each(function(k, r) if r['expression'] then local expr_atoms = r['expression']:atoms() for i,a in ipairs(expr_atoms) do if not atoms[a] then local rspamd_symbol, replaced_symbol = replace_symbol(a) rspamd_logger.debugx('atom %1 is a direct foreign dependency, ' .. 'register dependency for %2 on %3', a, k, rspamd_symbol) rspamd_config:register_dependency(k, rspamd_symbol) if not external_deps[k] then external_deps[k] = {rspamd_symbol} else table.insert(external_deps[k], rspamd_symbol) end end end end end, filter(function(k, r) return r['type'] == 'meta' end, rules)) -- ... And then indirect ones ... each(function(k, r) if r['expression'] then local expr_atoms = r['expression']:atoms() for i,a in ipairs(expr_atoms) do if type(external_deps[a]) == 'table' then for _,dep in ipairs(external_deps[a]) do rspamd_logger.debugx('atom %1 holds a foreign dependency, ' .. 'register dependency for %2 on %3', a, k, dep); rspamd_config:register_dependency(k, dep) end else local rspamd_symbol, replaced_symbol = replace_symbol(a) if replaced_symbol then external_deps[a] = rspamd_symbol else external_deps[a] = 1 end end end end end, filter(function(k, r) return r['type'] == 'meta' end, rules)) -- Set missing symbols each(function(key, score) if not scores_added[key] then rspamd_config:set_metric_symbol({ name = key, score = score, priority = 2, flags = 'ignore'}) end end, scores) -- Logging output if freemail_domains then freemail_trie = rspamd_trie.create(freemail_domains) rspamd_logger.infox(rspamd_config, 'loaded %1 freemail domains definitions', #freemail_domains) end rspamd_logger.infox(rspamd_config, 'loaded %1 blacklist/whitelist elements', sa_lists['elts']) end local has_rules = false if type(section) == "table" then for k, fn in pairs(section) do if k == 'alpha' and type(fn) == 'number' then meta_score_alpha = fn elseif k == 'match_limit' and type(fn) == 'number' then match_limit = fn elseif k == 'pcre_only' and type(fn) == 'table' then for i,s in ipairs(fn) do pcre_only_regexps[s] = 1 end else if type(fn) == 'table' then for k, elt in ipairs(fn) do local files = util.glob(elt) for i,matched in ipairs(files) do f = io.open(matched, "r") if f then process_sa_conf(f) has_rules = true else rspamd_logger.errx(rspamd_config, "cannot open %1", matched) end end end else -- assume string local files = util.glob(fn) for i,matched in ipairs(files) do f = io.open(matched, "r") if f then process_sa_conf(f) has_rules = true else rspamd_logger.errx(rspamd_config, "cannot open %1", matched) end end end end end end if has_rules then post_process() end