diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-03-13 10:43:10 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-03-13 10:43:10 +0000 |
commit | e2d44157b31eff46aeb1645b73e5b576b5afa897 (patch) | |
tree | f20587302ab77f8aaf5762d7a2a06a1d19b91cef /rules/html.lua | |
parent | 99c27fc7bec4217ebd2514adac9e2b9c7f129984 (diff) | |
download | rspamd-e2d44157b31eff46aeb1645b73e5b576b5afa897.tar.gz rspamd-e2d44157b31eff46aeb1645b73e5b576b5afa897.zip |
[Rules] Fix HTTP_TO_HTTPS rule
Diffstat (limited to 'rules/html.lua')
-rw-r--r-- | rules/html.lua | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/rules/html.lua b/rules/html.lua index 1891cfefa..5430f49b7 100644 --- a/rules/html.lua +++ b/rules/html.lua @@ -15,6 +15,8 @@ local reconf = config['regexp'] +local rspamd_regexp = require "rspamd_regexp" + -- Messages that have only HTML part reconf['MIME_HTML_ONLY'] = { re = 'has_only_html_part()', @@ -368,34 +370,44 @@ rspamd_config.EXT_CSS = { rspamd_config.HTTP_TO_HTTPS = { callback = function(task) - local tp = task:get_text_parts() - if (not tp) then return false end + local http_re = rspamd_regexp.create_cached('/^http[^s]/i') + local https_re = rspamd_regexp.create_cached('/^https:/i') + local found_opts + local tp = task:get_text_parts() or {} + for _,p in ipairs(tp) do if p:is_html() then local hc = p:get_html() if (not hc) then return false end + local found = false + hc:foreach_tag('a', function (tag, length) -- Skip this loop if we already have a match if (found) then return true end + local c = tag:get_content() if (c) then - c = tostring(c):lower() - if (not c:match('^http')) then return false end + if (not http_re:match(c)) then return false end + local u = tag:get_extra() if (not u) then return false end - u = tostring(u):lower() - if (not u:match('^http')) then return false end - if ((c:match('^http:') and u:match('^https:')) or - (c:match('^https:') and u:match('^http:'))) - then + local url_proto = u:get_protocol() + if (not url_proto:match('^http')) then return false end + -- Capture matches for http in href to https in visible part only + if ((https_re:match(c) and url_proto == 'http')) then found = true + found_opts = u:get_host() return true end end return false end) - if (found) then return true end + + if (found) then + return true,1.0,found_opts + end + return false end end |