diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-02-24 20:14:38 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-02-24 20:14:38 +0000 |
commit | 8fcd9548181172bf53992ee1039c8ba550727dd1 (patch) | |
tree | 3178d1d930aa7ccb4cb738d5a78e06b6f92287b2 /lualib | |
parent | 1629eb5614c82e4ec3b1d480ee74d01eac0b66fe (diff) | |
download | rspamd-8fcd9548181172bf53992ee1039c8ba550727dd1.tar.gz rspamd-8fcd9548181172bf53992ee1039c8ba550727dd1.zip |
[Project] Fix urls composition rules, add unit tests
Diffstat (limited to 'lualib')
-rw-r--r-- | lualib/lua_urls_compose.lua | 121 |
1 files changed, 86 insertions, 35 deletions
diff --git a/lualib/lua_urls_compose.lua b/lualib/lua_urls_compose.lua index 3677b871b..d63dc2428 100644 --- a/lualib/lua_urls_compose.lua +++ b/lualib/lua_urls_compose.lua @@ -26,6 +26,7 @@ local rspamd_util = require "rspamd_util" local bit = require "bit" local rspamd_trie = require "rspamd_trie" local fun = require "fun" +local rspamd_regexp = require "rspamd_regexp" local maps_cache = {} @@ -35,11 +36,11 @@ local function process_url(self, log_obj, url_tld, url_host) local tld_elt = self.tlds[url_tld] if tld_elt then - lua_util.debugm(N, log_obj, 'found compose rule for %s (%s host)', + lua_util.debugm(N, log_obj, 'found compose tld for %s (host = %s)', url_tld, url_host) for _,excl in ipairs(tld_elt.except_rules) do - local matched,ret = excl[2](url_host) + local matched,ret = excl[2](url_tld, url_host) if matched then lua_util.debugm(N, log_obj, 'found compose exclusion for %s (%s) -> %s', url_host, excl[1], ret) @@ -51,27 +52,41 @@ local function process_url(self, log_obj, url_tld, url_host) if tld_elt.multipattern_compose_rules then local matches = tld_elt.multipattern_compose_rules:match(url_host) - if #matches == 1 then - local lua_pat = tld_elt.compose_rules[matches[1]] + if matches then + local lua_pat_idx = math.huge - local matched,ret = lua_pat[2](url_host) + for m,_ in pairs(matches) do + if m < lua_pat_idx then + lua_pat_idx = m + end + end - if not matched then - lua_util.debugm(N, log_obj, 'NOT found compose inclusion for %s (%s) -> %s', - url_host, lua_pat[1], url_tld) + if #tld_elt.compose_rules >= lua_pat_idx then + local lua_pat = tld_elt.compose_rules[lua_pat_idx] + local matched,ret = lua_pat[2](url_tld, url_host) - return url_tld + if not matched then + lua_util.debugm(N, log_obj, 'NOT found compose inclusion for %s (%s) -> %s', + url_host, lua_pat[1], url_tld) + + return url_tld + else + lua_util.debugm(N, log_obj, 'found compose inclusion for %s (%s) -> %s', + url_host, lua_pat[1], ret) + + return ret + end else - lua_util.debugm(N, log_obj, 'found compose inclusion for %s (%s) -> %s', - url_host, lua_pat[1], ret) + lua_util.debugm(N, log_obj, 'NOT found compose inclusion for %s (%s) -> %s', + url_host, lua_pat_idx, url_tld) - return ret + return url_tld end end else -- Match one by one for _,lua_pat in ipairs(tld_elt.compose_rules) do - local matched,ret = lua_pat[2](url_host) + local matched,ret = lua_pat[2](url_tld, url_host) if matched then lua_util.debugm(N, log_obj, 'found compose inclusion for %s (%s) -> %s', url_host, lua_pat[1], ret) @@ -83,6 +98,9 @@ local function process_url(self, log_obj, url_tld, url_host) lua_util.debugm(N, log_obj, 'not found compose inclusion for %s in %s -> %s', url_host, url_tld, url_tld) + else + lua_util.debugm(N, log_obj, 'not found compose tld for %s in %s -> %s', + url_host, url_tld, url_tld) end return url_tld @@ -92,22 +110,25 @@ local function tld_pattern_transform(tld_pat) -- Convert tld like pattern to a lua match pattern -- blah -> %.blah -- *.blah -> .*%.blah - - if tld_pat:sub(1, 1) == '*' then - tld_pat = tld_pat:gsub('%*', '.*%%') .. '$' + local ret + if tld_pat:sub(1, 2) == '*.' then + ret = string.format('^((?:[^.]+\\.)*%s)$', tld_pat:sub(3)) else - tld_pat = string.format('([^.]*%%.%s)$', tld_pat) + ret = string.format('(?:^|\\.)((?:[^.]+\\.)?%s)$', tld_pat) end - return tld_pat + lua_util.debugm(N, nil, 'added pattern %s -> %s', + tld_pat, ret) + + return ret end local function include_elt_gen(pat) - pat = tld_pattern_transform(pat) + pat = rspamd_regexp.create(tld_pattern_transform(pat), 'i') return function(_, host) - local match = pat:match(host) - if match then - return true,match + local matches = pat:search(host, false, true) + if matches then + return true,matches[1][2] end return false @@ -115,9 +136,9 @@ local function include_elt_gen(pat) end local function exclude_elt_gen(pat) - pat = tld_pattern_transform(pat) + pat = rspamd_regexp.create(tld_pattern_transform(pat)) return function(tld, host) - if host:match(pat) then + if pat:search(host) then return true,tld end @@ -143,7 +164,7 @@ local function compose_map_cb(self, map_text) local function process_map_line(l) -- Skip empty lines and comments if #l == 0 then return end - l = comments_strip_grammar(l) + l = comments_strip_grammar:match(l) if not l or #l == 0 then return end -- Get TLD @@ -158,38 +179,44 @@ local function compose_map_cb(self, map_text) except_rules = {}, multipattern_compose_rules = nil } + + lua_util.debugm(N, rspamd_config, 'processed new tld rule for %s', tld) self.tlds[tld] = tld_elt end + process_tld_rule(tld_elt, l) else lua_util.debugm(N, rspamd_config, 'cannot read tld from compose map line: %s', l) end end - for _,line in map_text:lines() do + for line in map_text:lines() do process_map_line(line) end - local multipattern_threshold = 32 + local multipattern_threshold = 1 for tld,tld_elt in pairs(self.tlds) do + -- Sort patterns to have longest labels before shortest ones, + -- so we can ensure that they match before + table.sort(tld_elt.compose_rules, function(e1, e2) + local _,ndots1 = string.gsub(e1[1], '(%.)', '') + local _,ndots2 = string.gsub(e2[1], '(%.)', '') + + return ndots1 > ndots2 + end) if rspamd_trie.has_hyperscan() and #tld_elt.compose_rules >= multipattern_threshold then lua_util.debugm(N, rspamd_config, 'tld %s has %s rules, apply multipattern', tld, #tld_elt.compose_rules) local flags = bit.bor(rspamd_trie.flags.re, rspamd_trie.flags.dot_all, rspamd_trie.flags.no_start, - rspamd_trie.flags.single_match) + rspamd_trie.flags.icase) -- We now convert our internal patterns to multipattern patterns local mp_table = fun.totable(fun.map(function(pat_elt) - local pat_str = pat_elt[1] - if pat_str:sub(1, 1) == '*' then - return '\\.?.*\\.?' .. pat_str .. '$' - else - return '\\.?' .. pat_str .. '$' - end - end)) + return tld_pattern_transform(pat_elt[1]) + end, tld_elt.compose_rules)) tld_elt.multipattern_compose_rules = rspamd_trie.create(mp_table, flags) end end @@ -226,4 +253,28 @@ exports.add_composition_map = function(cfg, map_obj) return map end +exports.inject_composition_rules = function(cfg, rules) + local hash_key = rules + local rspamd_text = require "rspamd_text" + if type(rules) == 'table' then + hash_key = lua_util.table_digest(rules) + end + + local map = maps_cache[hash_key] + + if not map then + local ret = { + process_url = process_url, + hash = hash_key, + tlds = {}, + } + + compose_map_cb(ret, rspamd_text.fromtable(rules, '\n')) + maps_cache[hash_key] = ret + map = ret + end + + return map +end + return exports
\ No newline at end of file |