aboutsummaryrefslogtreecommitdiffstats
path: root/rules/html.lua
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-03-13 10:43:10 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-03-13 10:43:10 +0000
commite2d44157b31eff46aeb1645b73e5b576b5afa897 (patch)
treef20587302ab77f8aaf5762d7a2a06a1d19b91cef /rules/html.lua
parent99c27fc7bec4217ebd2514adac9e2b9c7f129984 (diff)
downloadrspamd-e2d44157b31eff46aeb1645b73e5b576b5afa897.tar.gz
rspamd-e2d44157b31eff46aeb1645b73e5b576b5afa897.zip
[Rules] Fix HTTP_TO_HTTPS rule
Diffstat (limited to 'rules/html.lua')
-rw-r--r--rules/html.lua32
1 files changed, 22 insertions, 10 deletions
diff --git a/rules/html.lua b/rules/html.lua
index 1891cfefa..5430f49b7 100644
--- a/rules/html.lua
+++ b/rules/html.lua
@@ -15,6 +15,8 @@
local reconf = config['regexp']
+local rspamd_regexp = require "rspamd_regexp"
+
-- Messages that have only HTML part
reconf['MIME_HTML_ONLY'] = {
re = 'has_only_html_part()',
@@ -368,34 +370,44 @@ rspamd_config.EXT_CSS = {
rspamd_config.HTTP_TO_HTTPS = {
callback = function(task)
- local tp = task:get_text_parts()
- if (not tp) then return false end
+ local http_re = rspamd_regexp.create_cached('/^http[^s]/i')
+ local https_re = rspamd_regexp.create_cached('/^https:/i')
+ local found_opts
+ local tp = task:get_text_parts() or {}
+
for _,p in ipairs(tp) do
if p:is_html() then
local hc = p:get_html()
if (not hc) then return false end
+
local found = false
+
hc:foreach_tag('a', function (tag, length)
-- Skip this loop if we already have a match
if (found) then return true end
+
local c = tag:get_content()
if (c) then
- c = tostring(c):lower()
- if (not c:match('^http')) then return false end
+ if (not http_re:match(c)) then return false end
+
local u = tag:get_extra()
if (not u) then return false end
- u = tostring(u):lower()
- if (not u:match('^http')) then return false end
- if ((c:match('^http:') and u:match('^https:')) or
- (c:match('^https:') and u:match('^http:')))
- then
+ local url_proto = u:get_protocol()
+ if (not url_proto:match('^http')) then return false end
+ -- Capture matches for http in href to https in visible part only
+ if ((https_re:match(c) and url_proto == 'http')) then
found = true
+ found_opts = u:get_host()
return true
end
end
return false
end)
- if (found) then return true end
+
+ if (found) then
+ return true,1.0,found_opts
+ end
+
return false
end
end