aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lualib/lua_util.lua33
1 files changed, 30 insertions, 3 deletions
diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua
index 89a4016b2..5b0950419 100644
--- a/lualib/lua_util.lua
+++ b/lualib/lua_util.lua
@@ -672,9 +672,10 @@ exports.filter_specific_urls = function (urls, params)
if params.prefix then
cache_key = params.prefix
else
- cache_key = string.format('sp_urls_%d%s%s', params.limit,
+ cache_key = string.format('sp_urls_%d%s%s%s', params.limit,
tostring(params.need_emails or false),
- tostring(params.need_images or false))
+ tostring(params.need_images or false),
+ tostring(params.need_content or false))
end
local cached = params.task:cache_get(cache_key)
@@ -879,6 +880,7 @@ end
- - prefix <string> cache prefix (default = nil)
- - ignore_redirected <bool> (default = false)
- - need_images <bool> (default = false)
+- - need_content <bool> (default = false)
-- }
-- Apply heuristic in extracting of urls from task, this function
-- tries its best to extract specific number of urls from a task based on
@@ -891,6 +893,7 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte
esld_limit = 9999,
need_emails = false,
need_images = false,
+ need_content = false,
filter = nil,
prefix = nil,
ignore_ip = false,
@@ -914,8 +917,32 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte
for k,v in pairs(default_params) do
if type(params[k]) == 'nil' and v ~= nil then params[k] = v end
end
+ local url_params = {
+ emails = params.need_emails,
+ images = params.need_images,
+ content = params.need_content,
+ }
+
+ -- Shortcut for cached stuff
+ if params.task and not params.no_cache then
+ local cache_key
+ if params.prefix then
+ cache_key = params.prefix
+ else
+ cache_key = string.format('sp_urls_%d%s%s%s', params.limit,
+ tostring(params.need_emails or false),
+ tostring(params.need_images or false),
+ tostring(params.need_content or false))
+ end
+ local cached = params.task:cache_get(cache_key)
+
+ if cached then
+ return cached
+ end
+ end
- local urls = params.task:get_urls(params.need_emails, params.need_images)
+ -- No cache version
+ local urls = params.task:get_urls(url_params)
return exports.filter_specific_urls(urls, params)
end