diff options
Diffstat (limited to 'lualib/lua_util.lua')
-rw-r--r-- | lualib/lua_util.lua | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua index fe3d0d5e4..682a33bf5 100644 --- a/lualib/lua_util.lua +++ b/lualib/lua_util.lua @@ -671,9 +671,19 @@ exports.filter_specific_urls = function (urls, params) local res = {} - for _,u in ipairs(urls) do + local function process_single_url(u) local esld = u:get_tld() + if params.ignore_redirected and u:is_redirected() then + local redir = u:get_redirected() -- get the real url + local redir_tld = redir:get_tld() + + if redir_tld then + -- Ignore redirected as it should also be in the hash + return + end + end + if esld then if not eslds[esld] then eslds[esld] = {u} @@ -709,6 +719,10 @@ exports.filter_specific_urls = function (urls, params) end end + for _,u in ipairs(urls) do + process_single_url(u) + end + local limit = params.limit limit = limit - #res if limit <= 0 then limit = 1 end @@ -788,6 +802,7 @@ end - - need_emails <bool> (default = false) - - filter <callback> (default = nil) - - prefix <string> cache prefix (default = nil) +- - ignore_redirected <bool> (default = false) -- } -- Apply heuristic in extracting of urls from task, this function -- tries its best to extract specific number of urls from a task based on @@ -800,7 +815,8 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte esld_limit = 9999, need_emails = false, filter = nil, - prefix = nil + prefix = nil, + ignore_redirected = false, } local params |