diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-02-16 12:04:51 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-02-16 12:04:51 +0000 |
commit | bd9428b14979c7f06cc7871498e93f1893f1ec4f (patch) | |
tree | 738845dd0ae75ef06bb4fb3a30b695bf0516b084 /rules/html.lua | |
parent | 2fcc3bbac091a47444e0e33760b3cf3f96f030fa (diff) | |
download | rspamd-bd9428b14979c7f06cc7871498e93f1893f1ec4f.tar.gz rspamd-bd9428b14979c7f06cc7871498e93f1893f1ec4f.zip |
Fix html images rules to reduce FP rates
Suggested by: @moisseev
Issue: #525
Diffstat (limited to 'rules/html.lua')
-rw-r--r-- | rules/html.lua | 42 |
1 files changed, 31 insertions, 11 deletions
diff --git a/rules/html.lua b/rules/html.lua index c2635a8d0..0f26d1421 100644 --- a/rules/html.lua +++ b/rules/html.lua @@ -33,8 +33,14 @@ local function check_html_image(task, min, max) local images = hc:get_images() if images then for _,i in ipairs(images) do - if i['embedded'] then - return true + local tag = i['tag'] + if tag then + local parent = tag:get_parent() + if parent then + if parent:get_type() == 'a' then + return true + end + end end end end @@ -83,8 +89,16 @@ rspamd_config.R_EMPTY_IMAGE = { if images then -- if there are images for _,i in ipairs(images) do -- then iterate over images in the part - if i['embedded'] and i['height'] + i['width'] >= 400 then -- if we have a large image - return true -- add symbol + if i['height'] + i['width'] >= 400 then -- if we have a large image + local tag = i['tag'] + if tag then + local parent = tag:get_parent() + if parent then + if parent:get_type() == 'a' then + return true + end + end + end end end end @@ -112,13 +126,19 @@ rspamd_config.R_SUSPICIOUS_IMAGES = { if img then for _, i in ipairs(img) do - if i['embedded'] then - local dim = i['width'] + i['height'] - - -- do not trigger on small and large images - if dim > 100 and dim < 3000 then - -- We assume that a single picture 100x200 contains approx 3 words of text - pic_words = pic_words + dim / 100 + local dim = i['width'] + i['height'] + local tag = i['tag'] + + if tag then + local parent = tag:get_parent() + if parent then + if parent:get_type() == 'a' then + -- do not trigger on small and large images + if dim > 100 and dim < 3000 then + -- We assume that a single picture 100x200 contains approx 3 words of text + pic_words = pic_words + dim / 100 + end + end end end end |