123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438 |
- -- Licensed to the Apache Software Foundation (ASF) under one or more
- -- contributor license agreements. See the NOTICE file distributed with
- -- this work for additional information regarding copyright ownership.
- -- The ASF licenses this file to you under the Apache License, Version 2.0
- -- (the "License"); you may not use this file except in compliance with
- -- the License. You may obtain a copy of the License at:
- --
- -- http://www.apache.org/licenses/LICENSE-2.0
- --
- -- Unless required by applicable law or agreed to in writing, software
- -- distributed under the License is distributed on an "AS IS" BASIS,
- -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- -- See the License for the specific language governing permissions and
- -- limitations under the License.
-
- local reconf = config['regexp']
-
- -- Messages that have only HTML part
- reconf['MIME_HTML_ONLY'] = {
- re = 'has_only_html_part()',
- score = 0.2,
- description = 'Messages that have only HTML part',
- group = 'headers'
- }
-
- local function has_anchor_parent(tag)
- local parent = tag
- repeat
- parent = parent:get_parent()
- if parent then
- if parent:get_type() == 'a' then
- return true
- end
- end
- until not parent
-
- return false
- end
-
- local function check_html_image(task, min, max)
- local tp = task:get_text_parts()
-
- for _,p in ipairs(tp) do
- if p:is_html() then
- local hc = p:get_html()
- local len = p:get_length()
-
-
- if hc and len >= min and len < max then
- local images = hc:get_images()
- if images then
- for _,i in ipairs(images) do
- local tag = i['tag']
- if tag then
- if has_anchor_parent(tag) then
- -- do not trigger on small and unknown size images
- if i['height'] + i['width'] >= 210 or not i['embedded'] then
- return true
- end
- end
- end
- end
- end
- end
- end
- end
- end
-
- rspamd_config.HTML_SHORT_LINK_IMG_1 = {
- callback = function(task)
- return check_html_image(task, 0, 1024)
- end,
- score = 2.0,
- group = 'html',
- description = 'Short html part (0..1K) with a link to an image'
- }
-
- rspamd_config.HTML_SHORT_LINK_IMG_2 = {
- callback = function(task)
- return check_html_image(task, 1024, 1536)
- end,
- score = 1.0,
- group = 'html',
- description = 'Short html part (1K..1.5K) with a link to an image'
- }
-
- rspamd_config.HTML_SHORT_LINK_IMG_3 = {
- callback = function(task)
- return check_html_image(task, 1536, 2048)
- end,
- score = 0.5,
- group = 'html',
- description = 'Short html part (1.5K..2K) with a link to an image'
- }
-
- rspamd_config.R_EMPTY_IMAGE = {
- callback = function(task)
- local tp = task:get_text_parts() -- get text parts in a message
-
- for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
- if p:is_html() then -- if the current part is html part
- local hc = p:get_html() -- we get HTML context
- local len = p:get_length() -- and part's length
- if hc and len < 50 then -- if we have a part that has less than 50 bytes of text
- local images = hc:get_images() -- then we check for HTML images
-
- if images then -- if there are images
- for _,i in ipairs(images) do -- then iterate over images in the part
- if i['height'] + i['width'] >= 400 then -- if we have a large image
- local tag = i['tag']
- if tag then
- if not has_anchor_parent(tag) then
- return true
- end
- end
- end
- end
- end
- end
- end
- end
- end,
-
- score = 2.0,
- group = 'html',
- description = 'Message contains empty parts and image'
- }
-
- rspamd_config.R_SUSPICIOUS_IMAGES = {
- callback = function(task)
- local tp = task:get_text_parts() -- get text parts in a message
-
- for _, p in ipairs(tp) do
- local h = p:get_html()
-
- if h then
- local l = p:get_words_count()
- local img = h:get_images()
- local pic_words = 0
-
- if img then
- for _, i in ipairs(img) do
- local dim = i['width'] + i['height']
- local tag = i['tag']
-
- if tag then
- if has_anchor_parent(tag) then
- if dim > 100 and dim < 3000 then
- -- We assume that a single picture 100x200 contains approx 3 words of text
- pic_words = pic_words + dim / 100
- end
- end
- end
- end
- end
-
- if l + pic_words > 0 then
- local rel = pic_words / (l + pic_words)
-
- if rel > 0.5 then
- return true, (rel - 0.5) * 2
- end
- end
- end
- end
-
- return false
- end,
-
- score = 5.0,
- group = 'html',
- description = 'Message contains many suspicious messages'
- }
-
- local vis_check_id = rspamd_config:register_symbol{
- name = 'HTML_VISIBLE_CHECKS',
- type = 'callback',
- group = 'html',
- callback = function(task)
- --local logger = require "rspamd_logger"
- local tp = task:get_text_parts() -- get text parts in a message
- local ret = false
- local diff = 0.0
- local transp_rate = 0
- local invisible_blocks = 0
- local zero_size_blocks = 0
- local arg
-
- local normal_len = 0
- local transp_len = 0
-
- for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
- normal_len = normal_len + p:get_length()
- if p:is_html() and p:get_html() then -- if the current part is html part
- local hc = p:get_html() -- we get HTML context
-
- hc:foreach_tag({'font', 'span', 'div', 'p', 'td'}, function(tag)
- local bl = tag:get_extra()
- if bl then
- if not bl['visible'] then
- invisible_blocks = invisible_blocks + 1
- end
-
- if bl['font_size'] and bl['font_size'] == 0 then
- zero_size_blocks = zero_size_blocks + 1
- end
-
- if bl['bgcolor'] and bl['color'] and bl['visible'] then
-
- local color = bl['color']
- local bgcolor = bl['bgcolor']
- -- Should use visual approach here some day
- local diff_r = math.abs(color[1] - bgcolor[1])
- local diff_g = math.abs(color[2] - bgcolor[2])
- local diff_b = math.abs(color[3] - bgcolor[3])
- local r_avg = (color[1] + bgcolor[1]) / 2.0
- -- Square
- diff_r = diff_r * diff_r
- diff_g = diff_g * diff_g
- diff_b = diff_b * diff_b
-
- diff = math.sqrt(2*diff_r + 4*diff_g + 3 * diff_b +
- (r_avg * (diff_r - diff_b) / 256.0))
- diff = diff / 256.0
-
- if diff < 0.1 then
- ret = true
- local content_len = #(tag:get_content() or {})
- invisible_blocks = invisible_blocks + 1 -- This block is invisible
- transp_len = transp_len + content_len * (0.1 - diff) * 10.0
- normal_len = normal_len - content_len
- local tr = transp_len / (normal_len + transp_len)
- if tr > transp_rate then
- transp_rate = tr
- arg = string.format('%s color #%x%x%x bgcolor #%x%x%x',
- tostring(tag:get_type()),
- color[1], color[2], color[3],
- bgcolor[1], bgcolor[2], bgcolor[3])
- end
- end
- end
- end
-
- return false -- Continue search
- end)
-
- end
- end
-
- if ret then
- transp_rate = transp_len / (normal_len + transp_len)
-
- if transp_rate > 0.1 then
- if transp_rate > 0.5 or transp_rate ~= transp_rate then
- transp_rate = 0.5
- end
-
- task:insert_result('R_WHITE_ON_WHITE', (transp_rate * 2.0), arg)
- end
- end
-
- if invisible_blocks > 0 then
- if invisible_blocks > 10 then
- invisible_blocks = 10
- end
- local rates = { -- From 1 to 10
- 0.05,
- 0.1,
- 0.2,
- 0.3,
- 0.4,
- 0.5,
- 0.6,
- 0.7,
- 0.8,
- 1.0,
- }
- task:insert_result('MANY_INVISIBLE_PARTS', rates[invisible_blocks],
- tostring(invisible_blocks))
- end
-
- if zero_size_blocks > 0 then
- if zero_size_blocks > 5 then
- if zero_size_blocks > 10 then
- -- Full score
- task:insert_result('ZERO_FONT', 1.0,
- tostring(zero_size_blocks))
- else
- zero_size_blocks = 5
- end
- end
-
- if zero_size_blocks <= 5 then
- local rates = { -- From 1 to 5
- 0.1,
- 0.2,
- 0.2,
- 0.3,
- 0.5,
- }
- task:insert_result('ZERO_FONT', rates[zero_size_blocks],
- tostring(zero_size_blocks))
- end
- end
- end,
- }
-
- rspamd_config:register_symbol{
- type = 'virtual',
- parent = vis_check_id,
- name = 'R_WHITE_ON_WHITE',
- description = 'Message contains low contrast text',
- score = 4.0,
- group = 'html',
- one_shot = true,
- }
-
- rspamd_config:register_symbol{
- type = 'virtual',
- parent = vis_check_id,
- name = 'ZERO_FONT',
- description = 'Zero sized font used',
- score = 1.0, -- Reached if more than 5 elements have zero size
- one_shot = true,
- group = 'html'
- }
-
- rspamd_config:register_symbol{
- type = 'virtual',
- parent = vis_check_id,
- name = 'MANY_INVISIBLE_PARTS',
- description = 'Many parts are visually hidden',
- score = 1.0, -- Reached if more than 10 elements are hidden
- one_shot = true,
- group = 'html'
- }
-
- rspamd_config.EXT_CSS = {
- callback = function(task)
- local regexp_lib = require "rspamd_regexp"
- local re = regexp_lib.create_cached('/^.*\\.css(?:[?#].*)?$/i')
- local tp = task:get_text_parts() -- get text parts in a message
- local ret = false
- for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
- if p:is_html() and p:get_html() then -- if the current part is html part
- local hc = p:get_html() -- we get HTML context
- hc:foreach_tag({'link'}, function(tag)
- local bl = tag:get_extra()
- if bl then
- local s = tostring(bl)
- if s and re:match(s) then
- ret = true
- end
- end
-
- return ret -- Continue search
- end)
-
- end
- end
-
- return ret
- end,
-
- score = 1.0,
- group = 'html',
- description = 'Message contains external CSS reference'
- }
-
- rspamd_config.HTTP_TO_HTTPS = {
- callback = function(task)
- local tp = task:get_text_parts()
- if (not tp) then return false end
- for _,p in ipairs(tp) do
- if p:is_html() then
- local hc = p:get_html()
- if (not hc) then return false end
- local found = false
- hc:foreach_tag('a', function (tag, length)
- -- Skip this loop if we already have a match
- if (found) then return true end
- local c = tag:get_content()
- if (c) then
- c = tostring(c):lower()
- if (not c:match('^http')) then return false end
- local u = tag:get_extra()
- if (not u) then return false end
- u = tostring(u):lower()
- if (not u:match('^http')) then return false end
- if ((c:match('^http:') and u:match('^https:')) or
- (c:match('^https:') and u:match('^http:')))
- then
- found = true
- return true
- end
- end
- return false
- end)
- if (found) then return true end
- return false
- end
- end
- return false
- end,
- description = 'Anchor text contains different scheme to target URL',
- score = 2.0,
- group = 'html'
- }
-
- rspamd_config.HTTP_TO_IP = {
- callback = function(task)
- local tp = task:get_text_parts()
- if (not tp) then return false end
- for _,p in ipairs(tp) do
- if p:is_html() then
- local hc = p:get_html()
- if (not hc) then return false end
- local found = false
- hc:foreach_tag('a', function (tag, length)
- if (found) then return true end
- local u = tag:get_extra()
- if (u) then
- u = tostring(u):lower()
- if (u:match('^https?://%d+%.%d+%.%d+%.%d+')) then
- found = true
- end
- end
- return false
- end)
- if found then return true end
- return false
- end
- end
- end,
- description = 'Anchor points to an IP address',
- score = 1.0,
- group = 'html'
- }
|