diff options
author | Mikhail Galanin <mgalanin@mimecast.com> | 2018-07-31 09:31:50 +0100 |
---|---|---|
committer | Mikhail Galanin <mgalanin@mimecast.com> | 2018-07-31 09:31:50 +0100 |
commit | 48b89787a51a119e727c50e368d51d68c8dc3da4 (patch) | |
tree | fc1ac12fa6f76092c060458764a673d9f7244a6b /lualib | |
parent | 8a0d5ee8dcb1b2a74faa536bff92800ef8569173 (diff) | |
download | rspamd-48b89787a51a119e727c50e368d51d68c8dc3da4.tar.gz rspamd-48b89787a51a119e727c50e368d51d68c8dc3da4.zip |
[Minor] Pass params via table instead of long arglist. Added esld_limit param to lua_util.extract_specific_urls()
Diffstat (limited to 'lualib')
-rw-r--r-- | lualib/lua_util.lua | 94 |
1 files changed, 68 insertions, 26 deletions
diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua index 520318fe6..c43286d8b 100644 --- a/lualib/lua_util.lua +++ b/lualib/lua_util.lua @@ -490,36 +490,71 @@ end exports.override_defaults = override_defaults --[[[ --- @function lua_util.extract_specific_urls(task, limit, [need_emails[, filter[, prefix]) +-- @function lua_util.extract_specific_urls(params) +-- params: { +- - task +- - limit <int> (default = 9999) +- - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain) + works only if number of unique eSLD less than `limit` +- - need_emails <bool> (default = false) +- - filter <callback> (default = nil) +- - prefix <string> cache prefix (default = nil) +-- } -- Apply heuristic in extracting of urls from task, this function -- tries its best to extract specific number of urls from a task based on -- their characteristics --]] -exports.extract_specific_urls = function(task, lim, need_emails, filter, prefix) +-- exports.extract_specific_urls = function(params_or_task, limit, need_emails, filter, prefix) +exports.extract_specific_urls = function(params_or_task, limit, need_emails, filter, prefix) + local default_params = { + limit = 9999, + esld_limit = 9999, + need_emails = false, + filter = nil, + prefix = nil + } + + local params + if type(params_or_task) == 'table' and type(limit) == 'nil' then + params = params_or_task + else + -- Deprecated call + params = { + task = params_or_task, + limit = limit, + need_emails = need_emails, + filter = filter, + prefix = prefix + } + end + for k,v in pairs(default_params) do + if not params[k] then params[k] = default_params[k] end + end + + local cache_key - if prefix then - cache_key = prefix + if params.prefix then + cache_key = params.prefix else - cache_key = string.format('sp_urls_%d%s', lim, need_emails) + cache_key = string.format('sp_urls_%d%s', params.limit, params.need_emails) end - local cached = task:cache_get(cache_key) + local cached = params.task:cache_get(cache_key) if cached then return cached end - local urls = task:get_urls(need_emails) + local urls = params.task:get_urls(params.need_emails) if not urls then return {} end - if filter then urls = fun.totable(fun.filter(filter, urls)) end - - if #urls <= lim then - task:cache_set(cache_key, urls) + if params.filter then urls = fun.totable(fun.filter(params.filter, urls)) end + if #urls <= params.limit and #urls <= params.esld_limit then + params.task:cache_set(cache_key, urls) return urls end @@ -538,7 +573,9 @@ exports.extract_specific_urls = function(task, lim, need_emails, filter, prefix) eslds[esld] = {u} neslds = neslds + 1 else - table.insert(eslds[esld], u) + if #eslds[esld] < params.esld_limit then + table.insert(eslds[esld], u) + end end local parts = rspamd_str_split(esld, '.') @@ -566,35 +603,40 @@ exports.extract_specific_urls = function(task, lim, need_emails, filter, prefix) end end - lim = lim - #res - if lim <= 0 then lim = 1 end + local limit = params.limit + limit = limit - #res + if limit <= 0 then limit = 1 end - if neslds <= lim then + if neslds <= limit then -- We can get urls based on their eslds - while lim > 0 do + repeat + local item_found = false + for _,lurls in pairs(eslds) do if #lurls > 0 then table.insert(res, table.remove(lurls)) - lim = lim - 1 + limit = limit - 1 + item_found = true end end - end - task:cache_set(cache_key, urls) + until limit <= 0 or not item_found + + params.task:cache_set(cache_key, urls) return res end - if ntlds <= lim then - while lim > 0 do + if ntlds <= limit then + while limit > 0 do for _,lurls in pairs(tlds) do if #lurls > 0 then table.insert(res, table.remove(lurls)) - lim = lim - 1 + limit = limit - 1 end end end - task:cache_set(cache_key, urls) + params.task:cache_set(cache_key, urls) return res end @@ -611,14 +653,14 @@ exports.extract_specific_urls = function(task, lim, need_emails, filter, prefix) local tld2 = tlds[tlds_keys[ntlds - i]] table.insert(res, table.remove(tld1)) table.insert(res, table.remove(tld2)) - lim = lim - 2 + limit = limit - 2 - if lim <= 0 then + if limit <= 0 then break end end - task:cache_set(cache_key, urls) + params.task:cache_set(cache_key, urls) return res end |