aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-10 16:28:39 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-10 16:28:39 +0000
commit8e57fc2c1d7b3d5f7a9839f140e3502ee69e3d61 (patch)
tree6978e443012972b6e16d1539dbe1f71fc551e774 /src/plugins
parent74059e7f272613b677c90df1a428047d6bb0182d (diff)
downloadrspamd-8e57fc2c1d7b3d5f7a9839f140e3502ee69e3d61.tar.gz
rspamd-8e57fc2c1d7b3d5f7a9839f140e3502ee69e3d61.zip
Allow to exclude some regexps from hyperscan
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/lua/spamassassin.lua475
1 files changed, 252 insertions, 223 deletions
diff --git a/src/plugins/lua/spamassassin.lua b/src/plugins/lua/spamassassin.lua
index d714ecc5b..f0243acfa 100644
--- a/src/plugins/lua/spamassassin.lua
+++ b/src/plugins/lua/spamassassin.lua
@@ -55,7 +55,9 @@ local metas = {}
local scores = {}
local external_deps = {}
local freemail_domains = {}
+local pcre_only_regexps = {}
local freemail_trie
+local sa_mempool = rspamd_mempool.create()
local replace = {
tags = {},
pre = {},
@@ -93,6 +95,13 @@ local function trim(s)
return s:match "^%s*(.-)%s*$"
end
+local function is_pcre_only(name)
+ if pcre_only_regexps[name] then
+ return true
+ end
+ return false
+end
+
local function handle_header_def(hline, cur_rule)
--Now check for modifiers inside header's name
local hdrs = split(hline, '[^|]+')
@@ -112,7 +121,8 @@ local function handle_header_def(hline, cur_rule)
-- Rule to match all headers
rspamd_config:register_regexp({
re = re,
- type = 'allheader'
+ type = 'allheader',
+ pcre_only = is_pcre_only(cur_rule['symbol']),
})
cur_rule['function'] = function(task)
if not re then
@@ -609,19 +619,23 @@ local function process_sa_conf(f)
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'header',
- header = h})
+ header = h,
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
else
if h['raw'] then
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'rawheader',
- header = h['header']
+ header = h['header'],
+ pcre_only = is_pcre_only(cur_rule['symbol']),
})
else
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'header',
- header = h['header']
+ header = h['header'],
+ pcre_only = is_pcre_only(cur_rule['symbol']),
})
end
end
@@ -662,6 +676,7 @@ local function process_sa_conf(f)
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'rawmime',
+ pcre_only = is_pcre_only(cur_rule['symbol']),
})
valid_rule = true
cur_rule['re']:set_limit(match_limit)
@@ -700,6 +715,7 @@ local function process_sa_conf(f)
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'body',
+ pcre_only = is_pcre_only(cur_rule['symbol']),
})
cur_rule['re']:set_limit(match_limit)
cur_rule['re']:set_max_hits(1)
@@ -732,6 +748,7 @@ local function process_sa_conf(f)
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'url',
+ pcre_only = is_pcre_only(cur_rule['symbol']),
})
cur_rule['re']:set_limit(match_limit)
cur_rule['re']:set_max_hits(1)
@@ -774,35 +791,6 @@ local function process_sa_conf(f)
end
end
-if type(section) == "table" then
- for k,fn in pairs(section) do
- if k == 'alpha' and type(fn) == 'number' then
- meta_score_alpha = fn
- elseif k == 'match_limit' and type(fn) == 'number' then
- match_limit = fn
- else
- if type(fn) == 'table' then
- for k,elt in ipairs(fn) do
- f = io.open(elt, "r")
- if f then
- process_sa_conf(f)
- else
- rspamd_logger.errx(rspamd_config, "cannot open %s", elt)
- end
- end
- else
- -- assume string
- f = io.open(fn, "r")
- if f then
- process_sa_conf(f)
- else
- rspamd_logger.errx(rspamd_config, "cannot open %s", fn)
- end
- end
- end
- end
-end
-
-- Now check all valid rules and add the according rspamd rules
local function calculate_score(sym, rule)
@@ -910,63 +898,101 @@ local function apply_replacements(str)
return false,str
end
--- Replace rule tags
-local ntags = {}
-local function rec_replace_tags(tag, tagv)
- if ntags[tag] then return ntags[tag] end
- _.each(function(n, t)
- if n ~= tag then
- local s,matches = string.gsub(tagv, string.format("<%s>", n), t)
- if matches > 0 then
- ntags[tag] = rec_replace_tags(tag, s)
- end
+local function parse_atom(str)
+ local atom = table.concat(_.totable(_.take_while(function(c)
+ if string.find(', \t()><+!|&\n', c) then
+ return false
end
- end, replace['tags'])
+ return true
+ end, _.iter(str))), '')
- if not ntags[tag] then ntags[tag] = tagv end
- return ntags[tag]
+ return atom
end
-_.each(function(n, t)
- rec_replace_tags(n, t)
-end, replace['tags'])
-_.each(function(n, t)
- replace['tags'][n] = t
-end, ntags)
-
-_.each(function(r)
- local rule = rules[r]
-
- if rule['re_expr'] and rule['re'] then
- local res,nexpr = apply_replacements(rule['re_expr'])
- if res then
- local nre = rspamd_regexp.create(nexpr)
- if not nre then
- rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r)
- rule['re'] = nil
- else
- local old_max_hits = rule['re']:get_max_hits()
- rspamd_logger.debugx(rspamd_config, 'replace %1 -> %2', r, nexpr)
- rspamd_config:replace_regexp({
- old_re = rule['re'],
- new_re = nre
- })
- rule['re'] = nre
- rule['re_expr'] = nexpr
- nre:set_limit(match_limit)
- nre:set_max_hits(old_max_hits)
- end
+
+local function process_atom(atom, task)
+ local atom_cb = atoms[atom]
+ if atom_cb then
+ local res = atom_cb(task)
+
+ if not res then
+ rspamd_logger.debugx(task, 'atom: %1, NULL result', atom)
+ elseif res > 0 then
+ rspamd_logger.debugx(task, 'atom: %1, result: %2', atom, res)
+ end
+ return res
+ elseif external_deps[atom] then
+ local res = 0
+ if task:get_symbol(atom) then
+ res = 1
end
+ rspamd_logger.debugx(task, 'external atom: %1, result: %2', atom, res)
+
+ return res
+ else
+ rspamd_logger.debugx(task, 'Cannot find atom ' .. atom)
end
-end, replace['rules'])
+ return 0
+end
-_.each(function(key, score)
- if rules[key] then
- rules[key]['score'] = score
+local function post_process()
+ -- Replace rule tags
+ local ntags = {}
+ local function rec_replace_tags(tag, tagv)
+ if ntags[tag] then return ntags[tag] end
+ _.each(function(n, t)
+ if n ~= tag then
+ local s, matches = string.gsub(tagv, string.format("<%s>", n), t)
+ if matches > 0 then
+ ntags[tag] = rec_replace_tags(tag, s)
+ end
+ end
+ end, replace['tags'])
+
+ if not ntags[tag] then ntags[tag] = tagv end
+ return ntags[tag]
end
-end, scores)
--- Header rules
-_.each(function(k, r)
+ _.each(function(n, t)
+ rec_replace_tags(n, t)
+ end, replace['tags'])
+ _.each(function(n, t)
+ replace['tags'][n] = t
+ end, ntags)
+
+ _.each(function(r)
+ local rule = rules[r]
+
+ if rule['re_expr'] and rule['re'] then
+ local res, nexpr = apply_replacements(rule['re_expr'])
+ if res then
+ local nre = rspamd_regexp.create(nexpr)
+ if not nre then
+ rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r)
+ rule['re'] = nil
+ else
+ local old_max_hits = rule['re']:get_max_hits()
+ rspamd_logger.debugx(rspamd_config, 'replace %1 -> %2', r, nexpr)
+ rspamd_config:replace_regexp({
+ old_re = rule['re'],
+ new_re = nre
+ })
+ rule['re'] = nre
+ rule['re_expr'] = nexpr
+ nre:set_limit(match_limit)
+ nre:set_max_hits(old_max_hits)
+ end
+ end
+ end
+ end, replace['rules'])
+
+ _.each(function(key, score)
+ if rules[key] then
+ rules[key]['score'] = score
+ end
+ end, scores)
+
+ -- Header rules
+ _.each(function(k, r)
local f = function(task)
local raw = false
local check = {}
@@ -981,7 +1007,7 @@ _.each(function(k, r)
if not r['re'] then
rspamd_logger.errx(task, 're is missing for rule %s (%s header)', k,
- h['header'])
+ h['header'])
return 0
end
@@ -1004,52 +1030,52 @@ _.each(function(k, r)
local headers = {}
local hname = h['header']
- local hdr
- if h['mime'] then
- local parts = task:get_parts()
- for i,p in ipairs(parts) do
- local m_hdr = p:get_header_full(hname, h['strong'])
+ local hdr
+ if h['mime'] then
+ local parts = task:get_parts()
+ for i, p in ipairs(parts) do
+ local m_hdr = p:get_header_full(hname, h['strong'])
- if m_hdr then
- if not hdr then
- hdr = {}
- end
- for k,mh in ipairs(m_hdr) do
- table.insert(hdr, mh)
- end
+ if m_hdr then
+ if not hdr then
+ hdr = {}
+ end
+ for k, mh in ipairs(m_hdr) do
+ table.insert(hdr, mh)
end
end
- else
- hdr = task:get_header_full(hname, h['strong'])
end
+ else
+ hdr = task:get_header_full(hname, h['strong'])
+ end
- if hdr then
- for n, rh in ipairs(hdr) do
- -- Subject for optimization
- local str
- if h['raw'] then
- str = rh['value']
- raw = true
- else
- str = rh['decoded']
- end
- if not str then return 0 end
+ if hdr then
+ for n, rh in ipairs(hdr) do
+ -- Subject for optimization
+ local str
+ if h['raw'] then
+ str = rh['value']
+ raw = true
+ else
+ str = rh['decoded']
+ end
+ if not str then return 0 end
- if h['function'] then
- str = h['function'](str)
- end
+ if h['function'] then
+ str = h['function'](str)
+ end
- if type(str) == 'string' then
- table.insert(check, str)
- else
- for ii,c in ipairs(str) do
- table.insert(check, c)
- end
+ if type(str) == 'string' then
+ table.insert(check, str)
+ else
+ for ii, c in ipairs(str) do
+ table.insert(check, c)
end
end
- elseif r['unset'] then
- table.insert(check, r['unset'])
end
+ elseif r['unset'] then
+ table.insert(check, r['unset'])
+ end
end, r['header'])
if #check == 0 then
@@ -1057,7 +1083,7 @@ _.each(function(k, r)
return 0
end
- for i,c in ipairs(check) do
+ for i, c in ipairs(check) do
local match = sa_regexp_match(c, r['re'], raw, r)
if (match and not r['not']) or (not match and r['not']) then
return match
@@ -1075,13 +1101,13 @@ _.each(function(k, r)
--rspamd_config:register_symbol(k, calculate_score(k), f)
atoms[k] = f
end,
- _.filter(function(k, r)
+ _.filter(function(k, r)
return r['type'] == 'header' and r['header']
end,
- rules))
+ rules))
--- Custom function rules
-_.each(function(k, r)
+ -- Custom function rules
+ _.each(function(k, r)
local f = function(task)
local res = r['function'](task)
if res and res > 0 then
@@ -1098,13 +1124,13 @@ _.each(function(k, r)
--rspamd_config:register_symbol(k, calculate_score(k), f)
atoms[k] = f
end,
- _.filter(function(k, r)
+ _.filter(function(k, r)
return r['type'] == 'function' and r['function']
end,
- rules))
+ rules))
--- Parts rules
-_.each(function(k, r)
+ -- Parts rules
+ _.each(function(k, r)
local f = function(task)
if not r['re'] then
rspamd_logger.errx(task, 're is missing for rule %s', k)
@@ -1128,13 +1154,13 @@ _.each(function(k, r)
--rspamd_config:register_symbol(k, calculate_score(k), f)
atoms[k] = f
end,
- _.filter(function(k, r)
+ _.filter(function(k, r)
return r['type'] == 'part'
end,
- rules))
+ rules))
--- Raw body rules
-_.each(function(k, r)
+ -- Raw body rules
+ _.each(function(k, r)
local f = function(task)
if not r['re'] then
rspamd_logger.errx(task, 're is missing for rule %s', k)
@@ -1153,15 +1179,15 @@ _.each(function(k, r)
end
end
--rspamd_config:register_symbol(k, calculate_score(k), f)
- atoms[k] = f
+ atoms[k] = f
end,
- _.filter(function(k, r)
+ _.filter(function(k, r)
return r['type'] == 'message'
end,
- rules))
+ rules))
--- URL rules
-_.each(function(k, r)
+ -- URL rules
+ _.each(function(k, r)
local f = function(task)
if not r['re'] then
rspamd_logger.errx(task, 're is missing for rule %s', k)
@@ -1180,106 +1206,109 @@ _.each(function(k, r)
end
end
--rspamd_config:register_symbol(k, calculate_score(k), f)
- atoms[k] = f
+ atoms[k] = f
end,
- _.filter(function(k, r)
+ _.filter(function(k, r)
return r['type'] == 'uri'
end,
- rules))
-
-
-local sa_mempool = rspamd_mempool.create()
-
-local function parse_atom(str)
- local atom = table.concat(_.totable(_.take_while(function(c)
- if string.find(', \t()><+!|&\n', c) then
- return false
- end
- return true
- end, _.iter(str))), '')
-
- return atom
-end
-
-local function process_atom(atom, task)
- local atom_cb = atoms[atom]
- if atom_cb then
- local res = atom_cb(task)
-
- if not res then
- rspamd_logger.debugx(task, 'atom: %1, NULL result', atom)
- elseif res > 0 then
- rspamd_logger.debugx(task, 'atom: %1, result: %2', atom, res)
- end
- return res
- elseif external_deps[atom] then
- local res = 0
- if task:get_symbol(atom) then
- res = 1
- end
- rspamd_logger.debugx(task, 'external atom: %1, result: %2', atom, res)
-
- return res
- else
- rspamd_logger.debugx(task, 'Cannot find atom ' .. atom)
- end
- return 0
-end
+ rules))
+ -- Meta rules
+ _.each(function(k, r)
+ local expression = nil
+ -- Meta function callback
+ local meta_cb = function(task)
+ local res = 0
+ if expression then
+ res = expression:process(task)
+ end
+ if res > 0 then
+ task:insert_result(k, res)
+ end
--- Meta rules
-_.each(function(k, r)
- local expression = nil
- -- Meta function callback
- local meta_cb = function(task)
- local res = 0
- if expression then
- res = expression:process(task)
+ return res
end
- if res > 0 then
- task:insert_result(k, res)
+ expression = rspamd_expression.create(r['meta'],
+ {parse_atom, process_atom}, sa_mempool)
+ if not expression then
+ rspamd_logger.errx(rspamd_config, 'Cannot parse expression ' .. r['meta'])
+ else
+ if r['score'] then
+ rspamd_config:set_metric_symbol(k, r['score'], r['description'])
+ end
+ rspamd_config:register_symbol(k, calculate_score(k, r), meta_cb)
+ r['expression'] = expression
+ if not atoms[k] then
+ atoms[k] = meta_cb
+ end
end
+ end,
+ _.filter(function(k, r)
+ return r['type'] == 'meta'
+ end,
+ rules))
- return res
- end
- expression = rspamd_expression.create(r['meta'],
- {parse_atom, process_atom}, sa_mempool)
- if not expression then
- rspamd_logger.errx(rspamd_config, 'Cannot parse expression ' .. r['meta'])
- else
- if r['score'] then
- rspamd_config:set_metric_symbol(k, r['score'], r['description'])
- end
- rspamd_config:register_symbol(k, calculate_score(k, r), meta_cb)
- r['expression'] = expression
- if not atoms[k] then
- atoms[k] = meta_cb
+ -- Check meta rules for foreign symbols and register dependencies
+ _.each(function(k, r)
+ if r['expression'] then
+ local expr_atoms = r['expression']:atoms()
+
+ for i,a in ipairs(expr_atoms) do
+ if not atoms[a] then
+ rspamd_logger.debugx('atom %1 is foreign for SA plugin, register dependency for %2 on %3',
+ a, k, a);
+ rspamd_config:register_dependency(k, a)
+
+ if not external_deps[a] then
+ external_deps[a] = 1
+ end
+ end
+ end
end
- end
- end,
- _.filter(function(k, r)
+ end,
+ _.filter(function(k, r)
return r['type'] == 'meta'
end,
- rules))
+ rules))
--- Check meta rules for foreign symbols and register dependencies
-_.each(function(k, r)
- if r['expression'] then
- local expr_atoms = r['expression']:atoms()
+end
- for i,a in ipairs(expr_atoms) do
- if not atoms[a] then
- rspamd_logger.debugx('atom %1 is foreign for SA plugin, register dependency for %2 on %3',
- a, k, a);
- rspamd_config:register_dependency(k, a)
+local has_rules = false
- if not external_deps[a] then
- external_deps[a] = 1
+if type(section) == "table" then
+ for k, fn in pairs(section) do
+ if k == 'alpha' and type(fn) == 'number' then
+ meta_score_alpha = fn
+ elseif k == 'match_limit' and type(fn) == 'number' then
+ match_limit = fn
+ elseif k == 'pcre_only' and type(fn) == 'table' then
+ for i,s in ipairs(fn) do
+ pcre_only_regexps[s] = 1
+ end
+ else
+ if type(fn) == 'table' then
+ for k, elt in ipairs(fn) do
+ f = io.open(elt, "r")
+ if f then
+ process_sa_conf(f)
+ has_rules = true
+ else
+ rspamd_logger.errx(rspamd_config, "cannot open %s", elt)
end
end
+ else
+ -- assume string
+ f = io.open(fn, "r")
+ if f then
+ process_sa_conf(f)
+ has_rules = true
+ else
+ rspamd_logger.errx(rspamd_config, "cannot open %s", fn)
+ end
end
end
- end,
- _.filter(function(k, r)
- return r['type'] == 'meta'
- end,
- rules))
+ end
+end
+
+if has_rules then
+ post_process()
+end