From c3d9d449aadb4cd6853218b10de458c3227372bb Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 19 Aug 2019 14:35:36 +0100 Subject: [PATCH] [Minor] Allow to filter redirected --- lualib/lua_util.lua | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua index fe3d0d5e4..682a33bf5 100644 --- a/lualib/lua_util.lua +++ b/lualib/lua_util.lua @@ -671,9 +671,19 @@ exports.filter_specific_urls = function (urls, params) local res = {} - for _,u in ipairs(urls) do + local function process_single_url(u) local esld = u:get_tld() + if params.ignore_redirected and u:is_redirected() then + local redir = u:get_redirected() -- get the real url + local redir_tld = redir:get_tld() + + if redir_tld then + -- Ignore redirected as it should also be in the hash + return + end + end + if esld then if not eslds[esld] then eslds[esld] = {u} @@ -709,6 +719,10 @@ exports.filter_specific_urls = function (urls, params) end end + for _,u in ipairs(urls) do + process_single_url(u) + end + local limit = params.limit limit = limit - #res if limit <= 0 then limit = 1 end @@ -788,6 +802,7 @@ end - - need_emails (default = false) - - filter (default = nil) - - prefix cache prefix (default = nil) +- - ignore_redirected (default = false) -- } -- Apply heuristic in extracting of urls from task, this function -- tries its best to extract specific number of urls from a task based on @@ -800,7 +815,8 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte esld_limit = 9999, need_emails = false, filter = nil, - prefix = nil + prefix = nil, + ignore_redirected = false, } local params -- 2.39.5