Fix html images rules to reduce FP rates

Suggested by: @moisseev Issue: #525
author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2016-02-16 12:04:51 +0000
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2016-02-16 12:04:51 +0000
commit: bd9428b14979c7f06cc7871498e93f1893f1ec4f (patch)
tree: 738845dd0ae75ef06bb4fb3a30b695bf0516b084 /rules/html.lua
parent: 2fcc3bbac091a47444e0e33760b3cf3f96f030fa (diff)
download: rspamd-bd9428b14979c7f06cc7871498e93f1893f1ec4f.tar.gz
rspamd-bd9428b14979c7f06cc7871498e93f1893f1ec4f.zip
1 files changed, 31 insertions, 11 deletions
diff --git a/rules/html.lua b/rules/html.lua
index c2635a8d0..0f26d1421 100644
--- a/rules/html.lua
+++ b/rules/html.lua
@@ -33,8 +33,14 @@ local function check_html_image(task, min, max)
         local images = hc:get_images()
         if images then
           for _,i in ipairs(images) do
-            if i['embedded'] then
-              return true
+            local tag = i['tag']
+            if tag then
+              local parent = tag:get_parent()
+              if parent then
+                if parent:get_type() == 'a' then
+                  return true
+                end
+              end
             end
           end
         end
@@ -83,8 +89,16 @@ rspamd_config.R_EMPTY_IMAGE = {
 
           if images then -- if there are images
             for _,i in ipairs(images) do -- then iterate over images in the part
-              if i['embedded'] and i['height'] + i['width'] >= 400 then -- if we have a large image
-                return true -- add symbol
+              if i['height'] + i['width'] >= 400 then -- if we have a large image
+                local tag = i['tag']
+                if tag then
+                  local parent = tag:get_parent()
+                  if parent then
+                    if parent:get_type() == 'a' then
+                      return true
+                    end
+                  end
+                end
               end
             end
           end
@@ -112,13 +126,19 @@ rspamd_config.R_SUSPICIOUS_IMAGES = {
 
         if img then
           for _, i in ipairs(img) do
-            if i['embedded'] then
-              local dim = i['width'] + i['height']
-
-              -- do not trigger on small and large images
-              if dim > 100 and dim < 3000 then
-                -- We assume that a single picture 100x200 contains approx 3 words of text
-                pic_words = pic_words + dim / 100
+            local dim = i['width'] + i['height']
+            local tag = i['tag']
+
+            if tag then
+              local parent = tag:get_parent()
+              if parent then
+                if parent:get_type() == 'a' then
+                  -- do not trigger on small and large images
+                  if dim > 100 and dim < 3000 then
+                    -- We assume that a single picture 100x200 contains approx 3 words of text
+                    pic_words = pic_words + dim / 100
+                  end
+                end
               end
             end
           end
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2016-02-16 12:04:51 +0000
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2016-02-16 12:04:51 +0000
commit	bd9428b14979c7f06cc7871498e93f1893f1ec4f (patch)
tree	738845dd0ae75ef06bb4fb3a30b695bf0516b084 /rules/html.lua
parent	2fcc3bbac091a47444e0e33760b3cf3f96f030fa (diff)
download	rspamd-bd9428b14979c7f06cc7871498e93f1893f1ec4f.tar.gz rspamd-bd9428b14979c7f06cc7871498e93f1893f1ec4f.zip