summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-08-29 13:46:28 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-08-29 13:46:28 +0100
commit7eb40a10e77f7afb00a30eac0f4cea3b6f35c92b (patch)
treed280e4ce8a59bf3a2dc229108cefde3f015e8100
parentfdb077d507c91d60dbcbdadd206cfe6495cef4c1 (diff)
downloadrspamd-7eb40a10e77f7afb00a30eac0f4cea3b6f35c92b.tar.gz
rspamd-7eb40a10e77f7afb00a30eac0f4cea3b6f35c92b.zip
[Feature] Support images when extracting urls
-rw-r--r--lualib/lua_util.lua17
1 files changed, 15 insertions, 2 deletions
diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua
index 79c031b3c..b37ffd61d 100644
--- a/lualib/lua_util.lua
+++ b/lualib/lua_util.lua
@@ -640,8 +640,9 @@ exports.filter_specific_urls = function (urls, params)
if params.prefix then
cache_key = params.prefix
else
- cache_key = string.format('sp_urls_%d%s', params.limit,
- tostring(params.need_emails or false))
+ cache_key = string.format('sp_urls_%d%s%s', params.limit,
+ tostring(params.need_emails or false),
+ tostring(params.need_images or false))
end
local cached = params.task:cache_get(cache_key)
@@ -701,6 +702,16 @@ exports.filter_specific_urls = function (urls, params)
end
end
+ if flags.image then
+ if not params.need_images then
+ -- Ignore url
+ return
+ else
+ -- Penalise images in urls
+ priority = 0
+ end
+ end
+
local esld = u:get_tld()
local str_hash = tostring(u)
@@ -843,6 +854,7 @@ end
- - filter <callback> (default = nil)
- - prefix <string> cache prefix (default = nil)
- - ignore_redirected <bool> (default = false)
+- - need_images <bool> (default = false)
-- }
-- Apply heuristic in extracting of urls from task, this function
-- tries its best to extract specific number of urls from a task based on
@@ -854,6 +866,7 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte
limit = 9999,
esld_limit = 9999,
need_emails = false,
+ need_images = false,
filter = nil,
prefix = nil,
ignore_ip = false,