From f3423dd01487a6ac22512cf874368d76d47139f8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 5 May 2020 15:48:06 +0100 Subject: [PATCH] [Minor] Allow to get content urls in extract_specific routine --- lualib/lua_util.lua | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua index 89a4016b2..5b0950419 100644 --- a/lualib/lua_util.lua +++ b/lualib/lua_util.lua @@ -672,9 +672,10 @@ exports.filter_specific_urls = function (urls, params) if params.prefix then cache_key = params.prefix else - cache_key = string.format('sp_urls_%d%s%s', params.limit, + cache_key = string.format('sp_urls_%d%s%s%s', params.limit, tostring(params.need_emails or false), - tostring(params.need_images or false)) + tostring(params.need_images or false), + tostring(params.need_content or false)) end local cached = params.task:cache_get(cache_key) @@ -879,6 +880,7 @@ end - - prefix cache prefix (default = nil) - - ignore_redirected (default = false) - - need_images (default = false) +- - need_content (default = false) -- } -- Apply heuristic in extracting of urls from task, this function -- tries its best to extract specific number of urls from a task based on @@ -891,6 +893,7 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte esld_limit = 9999, need_emails = false, need_images = false, + need_content = false, filter = nil, prefix = nil, ignore_ip = false, @@ -914,8 +917,32 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte for k,v in pairs(default_params) do if type(params[k]) == 'nil' and v ~= nil then params[k] = v end end + local url_params = { + emails = params.need_emails, + images = params.need_images, + content = params.need_content, + } + + -- Shortcut for cached stuff + if params.task and not params.no_cache then + local cache_key + if params.prefix then + cache_key = params.prefix + else + cache_key = string.format('sp_urls_%d%s%s%s', params.limit, + tostring(params.need_emails or false), + tostring(params.need_images or false), + tostring(params.need_content or false)) + end + local cached = params.task:cache_get(cache_key) + + if cached then + return cached + end + end - local urls = params.task:get_urls(params.need_emails, params.need_images) + -- No cache version + local urls = params.task:get_urls(url_params) return exports.filter_specific_urls(urls, params) end -- 2.39.5