diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-12-10 16:28:39 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-12-10 16:28:39 +0000 |
commit | 8e57fc2c1d7b3d5f7a9839f140e3502ee69e3d61 (patch) | |
tree | 6978e443012972b6e16d1539dbe1f71fc551e774 /src/plugins | |
parent | 74059e7f272613b677c90df1a428047d6bb0182d (diff) | |
download | rspamd-8e57fc2c1d7b3d5f7a9839f140e3502ee69e3d61.tar.gz rspamd-8e57fc2c1d7b3d5f7a9839f140e3502ee69e3d61.zip |
Allow to exclude some regexps from hyperscan
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/lua/spamassassin.lua | 475 |
1 files changed, 252 insertions, 223 deletions
diff --git a/src/plugins/lua/spamassassin.lua b/src/plugins/lua/spamassassin.lua index d714ecc5b..f0243acfa 100644 --- a/src/plugins/lua/spamassassin.lua +++ b/src/plugins/lua/spamassassin.lua @@ -55,7 +55,9 @@ local metas = {} local scores = {} local external_deps = {} local freemail_domains = {} +local pcre_only_regexps = {} local freemail_trie +local sa_mempool = rspamd_mempool.create() local replace = { tags = {}, pre = {}, @@ -93,6 +95,13 @@ local function trim(s) return s:match "^%s*(.-)%s*$" end +local function is_pcre_only(name) + if pcre_only_regexps[name] then + return true + end + return false +end + local function handle_header_def(hline, cur_rule) --Now check for modifiers inside header's name local hdrs = split(hline, '[^|]+') @@ -112,7 +121,8 @@ local function handle_header_def(hline, cur_rule) -- Rule to match all headers rspamd_config:register_regexp({ re = re, - type = 'allheader' + type = 'allheader', + pcre_only = is_pcre_only(cur_rule['symbol']), }) cur_rule['function'] = function(task) if not re then @@ -609,19 +619,23 @@ local function process_sa_conf(f) rspamd_config:register_regexp({ re = cur_rule['re'], type = 'header', - header = h}) + header = h, + pcre_only = is_pcre_only(cur_rule['symbol']), + }) else if h['raw'] then rspamd_config:register_regexp({ re = cur_rule['re'], type = 'rawheader', - header = h['header'] + header = h['header'], + pcre_only = is_pcre_only(cur_rule['symbol']), }) else rspamd_config:register_regexp({ re = cur_rule['re'], type = 'header', - header = h['header'] + header = h['header'], + pcre_only = is_pcre_only(cur_rule['symbol']), }) end end @@ -662,6 +676,7 @@ local function process_sa_conf(f) rspamd_config:register_regexp({ re = cur_rule['re'], type = 'rawmime', + pcre_only = is_pcre_only(cur_rule['symbol']), }) valid_rule = true cur_rule['re']:set_limit(match_limit) @@ -700,6 +715,7 @@ local function process_sa_conf(f) rspamd_config:register_regexp({ re = cur_rule['re'], type = 'body', + pcre_only = is_pcre_only(cur_rule['symbol']), }) cur_rule['re']:set_limit(match_limit) cur_rule['re']:set_max_hits(1) @@ -732,6 +748,7 @@ local function process_sa_conf(f) rspamd_config:register_regexp({ re = cur_rule['re'], type = 'url', + pcre_only = is_pcre_only(cur_rule['symbol']), }) cur_rule['re']:set_limit(match_limit) cur_rule['re']:set_max_hits(1) @@ -774,35 +791,6 @@ local function process_sa_conf(f) end end -if type(section) == "table" then - for k,fn in pairs(section) do - if k == 'alpha' and type(fn) == 'number' then - meta_score_alpha = fn - elseif k == 'match_limit' and type(fn) == 'number' then - match_limit = fn - else - if type(fn) == 'table' then - for k,elt in ipairs(fn) do - f = io.open(elt, "r") - if f then - process_sa_conf(f) - else - rspamd_logger.errx(rspamd_config, "cannot open %s", elt) - end - end - else - -- assume string - f = io.open(fn, "r") - if f then - process_sa_conf(f) - else - rspamd_logger.errx(rspamd_config, "cannot open %s", fn) - end - end - end - end -end - -- Now check all valid rules and add the according rspamd rules local function calculate_score(sym, rule) @@ -910,63 +898,101 @@ local function apply_replacements(str) return false,str end --- Replace rule tags -local ntags = {} -local function rec_replace_tags(tag, tagv) - if ntags[tag] then return ntags[tag] end - _.each(function(n, t) - if n ~= tag then - local s,matches = string.gsub(tagv, string.format("<%s>", n), t) - if matches > 0 then - ntags[tag] = rec_replace_tags(tag, s) - end +local function parse_atom(str) + local atom = table.concat(_.totable(_.take_while(function(c) + if string.find(', \t()><+!|&\n', c) then + return false end - end, replace['tags']) + return true + end, _.iter(str))), '') - if not ntags[tag] then ntags[tag] = tagv end - return ntags[tag] + return atom end -_.each(function(n, t) - rec_replace_tags(n, t) -end, replace['tags']) -_.each(function(n, t) - replace['tags'][n] = t -end, ntags) - -_.each(function(r) - local rule = rules[r] - - if rule['re_expr'] and rule['re'] then - local res,nexpr = apply_replacements(rule['re_expr']) - if res then - local nre = rspamd_regexp.create(nexpr) - if not nre then - rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r) - rule['re'] = nil - else - local old_max_hits = rule['re']:get_max_hits() - rspamd_logger.debugx(rspamd_config, 'replace %1 -> %2', r, nexpr) - rspamd_config:replace_regexp({ - old_re = rule['re'], - new_re = nre - }) - rule['re'] = nre - rule['re_expr'] = nexpr - nre:set_limit(match_limit) - nre:set_max_hits(old_max_hits) - end + +local function process_atom(atom, task) + local atom_cb = atoms[atom] + if atom_cb then + local res = atom_cb(task) + + if not res then + rspamd_logger.debugx(task, 'atom: %1, NULL result', atom) + elseif res > 0 then + rspamd_logger.debugx(task, 'atom: %1, result: %2', atom, res) + end + return res + elseif external_deps[atom] then + local res = 0 + if task:get_symbol(atom) then + res = 1 end + rspamd_logger.debugx(task, 'external atom: %1, result: %2', atom, res) + + return res + else + rspamd_logger.debugx(task, 'Cannot find atom ' .. atom) end -end, replace['rules']) + return 0 +end -_.each(function(key, score) - if rules[key] then - rules[key]['score'] = score +local function post_process() + -- Replace rule tags + local ntags = {} + local function rec_replace_tags(tag, tagv) + if ntags[tag] then return ntags[tag] end + _.each(function(n, t) + if n ~= tag then + local s, matches = string.gsub(tagv, string.format("<%s>", n), t) + if matches > 0 then + ntags[tag] = rec_replace_tags(tag, s) + end + end + end, replace['tags']) + + if not ntags[tag] then ntags[tag] = tagv end + return ntags[tag] end -end, scores) --- Header rules -_.each(function(k, r) + _.each(function(n, t) + rec_replace_tags(n, t) + end, replace['tags']) + _.each(function(n, t) + replace['tags'][n] = t + end, ntags) + + _.each(function(r) + local rule = rules[r] + + if rule['re_expr'] and rule['re'] then + local res, nexpr = apply_replacements(rule['re_expr']) + if res then + local nre = rspamd_regexp.create(nexpr) + if not nre then + rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r) + rule['re'] = nil + else + local old_max_hits = rule['re']:get_max_hits() + rspamd_logger.debugx(rspamd_config, 'replace %1 -> %2', r, nexpr) + rspamd_config:replace_regexp({ + old_re = rule['re'], + new_re = nre + }) + rule['re'] = nre + rule['re_expr'] = nexpr + nre:set_limit(match_limit) + nre:set_max_hits(old_max_hits) + end + end + end + end, replace['rules']) + + _.each(function(key, score) + if rules[key] then + rules[key]['score'] = score + end + end, scores) + + -- Header rules + _.each(function(k, r) local f = function(task) local raw = false local check = {} @@ -981,7 +1007,7 @@ _.each(function(k, r) if not r['re'] then rspamd_logger.errx(task, 're is missing for rule %s (%s header)', k, - h['header']) + h['header']) return 0 end @@ -1004,52 +1030,52 @@ _.each(function(k, r) local headers = {} local hname = h['header'] - local hdr - if h['mime'] then - local parts = task:get_parts() - for i,p in ipairs(parts) do - local m_hdr = p:get_header_full(hname, h['strong']) + local hdr + if h['mime'] then + local parts = task:get_parts() + for i, p in ipairs(parts) do + local m_hdr = p:get_header_full(hname, h['strong']) - if m_hdr then - if not hdr then - hdr = {} - end - for k,mh in ipairs(m_hdr) do - table.insert(hdr, mh) - end + if m_hdr then + if not hdr then + hdr = {} + end + for k, mh in ipairs(m_hdr) do + table.insert(hdr, mh) end end - else - hdr = task:get_header_full(hname, h['strong']) end + else + hdr = task:get_header_full(hname, h['strong']) + end - if hdr then - for n, rh in ipairs(hdr) do - -- Subject for optimization - local str - if h['raw'] then - str = rh['value'] - raw = true - else - str = rh['decoded'] - end - if not str then return 0 end + if hdr then + for n, rh in ipairs(hdr) do + -- Subject for optimization + local str + if h['raw'] then + str = rh['value'] + raw = true + else + str = rh['decoded'] + end + if not str then return 0 end - if h['function'] then - str = h['function'](str) - end + if h['function'] then + str = h['function'](str) + end - if type(str) == 'string' then - table.insert(check, str) - else - for ii,c in ipairs(str) do - table.insert(check, c) - end + if type(str) == 'string' then + table.insert(check, str) + else + for ii, c in ipairs(str) do + table.insert(check, c) end end - elseif r['unset'] then - table.insert(check, r['unset']) end + elseif r['unset'] then + table.insert(check, r['unset']) + end end, r['header']) if #check == 0 then @@ -1057,7 +1083,7 @@ _.each(function(k, r) return 0 end - for i,c in ipairs(check) do + for i, c in ipairs(check) do local match = sa_regexp_match(c, r['re'], raw, r) if (match and not r['not']) or (not match and r['not']) then return match @@ -1075,13 +1101,13 @@ _.each(function(k, r) --rspamd_config:register_symbol(k, calculate_score(k), f) atoms[k] = f end, - _.filter(function(k, r) + _.filter(function(k, r) return r['type'] == 'header' and r['header'] end, - rules)) + rules)) --- Custom function rules -_.each(function(k, r) + -- Custom function rules + _.each(function(k, r) local f = function(task) local res = r['function'](task) if res and res > 0 then @@ -1098,13 +1124,13 @@ _.each(function(k, r) --rspamd_config:register_symbol(k, calculate_score(k), f) atoms[k] = f end, - _.filter(function(k, r) + _.filter(function(k, r) return r['type'] == 'function' and r['function'] end, - rules)) + rules)) --- Parts rules -_.each(function(k, r) + -- Parts rules + _.each(function(k, r) local f = function(task) if not r['re'] then rspamd_logger.errx(task, 're is missing for rule %s', k) @@ -1128,13 +1154,13 @@ _.each(function(k, r) --rspamd_config:register_symbol(k, calculate_score(k), f) atoms[k] = f end, - _.filter(function(k, r) + _.filter(function(k, r) return r['type'] == 'part' end, - rules)) + rules)) --- Raw body rules -_.each(function(k, r) + -- Raw body rules + _.each(function(k, r) local f = function(task) if not r['re'] then rspamd_logger.errx(task, 're is missing for rule %s', k) @@ -1153,15 +1179,15 @@ _.each(function(k, r) end end --rspamd_config:register_symbol(k, calculate_score(k), f) - atoms[k] = f + atoms[k] = f end, - _.filter(function(k, r) + _.filter(function(k, r) return r['type'] == 'message' end, - rules)) + rules)) --- URL rules -_.each(function(k, r) + -- URL rules + _.each(function(k, r) local f = function(task) if not r['re'] then rspamd_logger.errx(task, 're is missing for rule %s', k) @@ -1180,106 +1206,109 @@ _.each(function(k, r) end end --rspamd_config:register_symbol(k, calculate_score(k), f) - atoms[k] = f + atoms[k] = f end, - _.filter(function(k, r) + _.filter(function(k, r) return r['type'] == 'uri' end, - rules)) - - -local sa_mempool = rspamd_mempool.create() - -local function parse_atom(str) - local atom = table.concat(_.totable(_.take_while(function(c) - if string.find(', \t()><+!|&\n', c) then - return false - end - return true - end, _.iter(str))), '') - - return atom -end - -local function process_atom(atom, task) - local atom_cb = atoms[atom] - if atom_cb then - local res = atom_cb(task) - - if not res then - rspamd_logger.debugx(task, 'atom: %1, NULL result', atom) - elseif res > 0 then - rspamd_logger.debugx(task, 'atom: %1, result: %2', atom, res) - end - return res - elseif external_deps[atom] then - local res = 0 - if task:get_symbol(atom) then - res = 1 - end - rspamd_logger.debugx(task, 'external atom: %1, result: %2', atom, res) - - return res - else - rspamd_logger.debugx(task, 'Cannot find atom ' .. atom) - end - return 0 -end + rules)) + -- Meta rules + _.each(function(k, r) + local expression = nil + -- Meta function callback + local meta_cb = function(task) + local res = 0 + if expression then + res = expression:process(task) + end + if res > 0 then + task:insert_result(k, res) + end --- Meta rules -_.each(function(k, r) - local expression = nil - -- Meta function callback - local meta_cb = function(task) - local res = 0 - if expression then - res = expression:process(task) + return res end - if res > 0 then - task:insert_result(k, res) + expression = rspamd_expression.create(r['meta'], + {parse_atom, process_atom}, sa_mempool) + if not expression then + rspamd_logger.errx(rspamd_config, 'Cannot parse expression ' .. r['meta']) + else + if r['score'] then + rspamd_config:set_metric_symbol(k, r['score'], r['description']) + end + rspamd_config:register_symbol(k, calculate_score(k, r), meta_cb) + r['expression'] = expression + if not atoms[k] then + atoms[k] = meta_cb + end end + end, + _.filter(function(k, r) + return r['type'] == 'meta' + end, + rules)) - return res - end - expression = rspamd_expression.create(r['meta'], - {parse_atom, process_atom}, sa_mempool) - if not expression then - rspamd_logger.errx(rspamd_config, 'Cannot parse expression ' .. r['meta']) - else - if r['score'] then - rspamd_config:set_metric_symbol(k, r['score'], r['description']) - end - rspamd_config:register_symbol(k, calculate_score(k, r), meta_cb) - r['expression'] = expression - if not atoms[k] then - atoms[k] = meta_cb + -- Check meta rules for foreign symbols and register dependencies + _.each(function(k, r) + if r['expression'] then + local expr_atoms = r['expression']:atoms() + + for i,a in ipairs(expr_atoms) do + if not atoms[a] then + rspamd_logger.debugx('atom %1 is foreign for SA plugin, register dependency for %2 on %3', + a, k, a); + rspamd_config:register_dependency(k, a) + + if not external_deps[a] then + external_deps[a] = 1 + end + end + end end - end - end, - _.filter(function(k, r) + end, + _.filter(function(k, r) return r['type'] == 'meta' end, - rules)) + rules)) --- Check meta rules for foreign symbols and register dependencies -_.each(function(k, r) - if r['expression'] then - local expr_atoms = r['expression']:atoms() +end - for i,a in ipairs(expr_atoms) do - if not atoms[a] then - rspamd_logger.debugx('atom %1 is foreign for SA plugin, register dependency for %2 on %3', - a, k, a); - rspamd_config:register_dependency(k, a) +local has_rules = false - if not external_deps[a] then - external_deps[a] = 1 +if type(section) == "table" then + for k, fn in pairs(section) do + if k == 'alpha' and type(fn) == 'number' then + meta_score_alpha = fn + elseif k == 'match_limit' and type(fn) == 'number' then + match_limit = fn + elseif k == 'pcre_only' and type(fn) == 'table' then + for i,s in ipairs(fn) do + pcre_only_regexps[s] = 1 + end + else + if type(fn) == 'table' then + for k, elt in ipairs(fn) do + f = io.open(elt, "r") + if f then + process_sa_conf(f) + has_rules = true + else + rspamd_logger.errx(rspamd_config, "cannot open %s", elt) end end + else + -- assume string + f = io.open(fn, "r") + if f then + process_sa_conf(f) + has_rules = true + else + rspamd_logger.errx(rspamd_config, "cannot open %s", fn) + end end end - end, - _.filter(function(k, r) - return r['type'] == 'meta' - end, - rules)) + end +end + +if has_rules then + post_process() +end |