Browse Source

[Minor] Allow to filter redirected

tags/2.0
Vsevolod Stakhov 4 years ago
parent
commit
c3d9d449aa
1 changed files with 18 additions and 2 deletions
  1. 18
    2
      lualib/lua_util.lua

+ 18
- 2
lualib/lua_util.lua View File

@@ -671,9 +671,19 @@ exports.filter_specific_urls = function (urls, params)

local res = {}

for _,u in ipairs(urls) do
local function process_single_url(u)
local esld = u:get_tld()

if params.ignore_redirected and u:is_redirected() then
local redir = u:get_redirected() -- get the real url
local redir_tld = redir:get_tld()

if redir_tld then
-- Ignore redirected as it should also be in the hash
return
end
end

if esld then
if not eslds[esld] then
eslds[esld] = {u}
@@ -709,6 +719,10 @@ exports.filter_specific_urls = function (urls, params)
end
end

for _,u in ipairs(urls) do
process_single_url(u)
end

local limit = params.limit
limit = limit - #res
if limit <= 0 then limit = 1 end
@@ -788,6 +802,7 @@ end
- - need_emails <bool> (default = false)
- - filter <callback> (default = nil)
- - prefix <string> cache prefix (default = nil)
- - ignore_redirected <bool> (default = false)
-- }
-- Apply heuristic in extracting of urls from task, this function
-- tries its best to extract specific number of urls from a task based on
@@ -800,7 +815,8 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte
esld_limit = 9999,
need_emails = false,
filter = nil,
prefix = nil
prefix = nil,
ignore_redirected = false,
}

local params

Loading…
Cancel
Save