]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Fix HFILTER_URL module
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 5 Aug 2016 11:57:45 +0000 (12:57 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 5 Aug 2016 12:05:45 +0000 (13:05 +0100)
src/plugins/lua/hfilter.lua

index b32bac890c1b7c9b99125d1a92ad7721b623f5f4..88df8f5452deb4a09a031301851708fa1ad5d813 100644 (file)
@@ -242,35 +242,49 @@ local function hfilter(task)
   if config['url_enabled'] then
     local parts = task:get_text_parts()
     if parts then
-      --One text part--
-      local total_parts_len = 0
-      local text_parts_count = 0
-      local lines_max = 0
-      local selected_text_part = nil
+      local plain_text_part = nil
+      local html_text_part = nil
       for _,p in ipairs(parts) do
-        total_parts_len = total_parts_len + p:get_length()
-
-        if p:get_lines_count() > lines_max then
-          lines_max = p:get_lines_count()
-        end
-
-        if not p:is_html() then
-          text_parts_count = text_parts_count + 1
-          selected_text_part = p
+        if p:is_html() then
+          html_text_part = p
+        else
+          plain_text_part = p
         end
       end
-      if total_parts_len > 0 then
-        local urls = task:get_urls()
-        if urls then
-          local total_url_len = 0
-          for _,url in ipairs(urls) do
-            total_url_len = total_url_len + url:get_length()
+
+      if html_text_part then
+        local hc = html_text_part:get_html()
+        if hc then
+          local url_len = 0
+          hc:foreach_tag('a', function(tag, len)
+            url_len = url_len + len
+            return false
+          end)
+
+          local plen = html_text_part:get_length()
+
+          if url_len > 0 and plen > 0 then
+            local rel = url_len / plen
+            if rel > 0.8 then
+              task:insert_result('HFILTER_URL_ONLY', (rel - 0.8) * 5.0)
+              local lines =  html_text_part:get_lines()
+              if lines > 0 and lines < 2 then
+                task:insert_result('HFILTER_URL_ONELINE', 1.00)
+              end
+            end
           end
-          if total_url_len > 0 then
-            if total_url_len + 7 > total_parts_len then
-              task:insert_result('HFILTER_URL_ONLY', 1.00)
-            elseif lines_max > 0 and lines_max < 2 then
-              task:insert_result('HFILTER_URL_ONELINE', 1.00)
+        elseif plain_text_part then
+          local url_len = plain_text_part:get_urls_length()
+          local plen = plain_text_part:get_length()
+
+          if plen > 0 and url_len > 0 then
+            local rel = url_len / plen
+            if rel > 0.8 then
+              task:insert_result('HFILTER_URL_ONLY', (rel - 0.8) * 5.0)
+              local lines =  plain_text_part:get_lines()
+              if lines > 0 and lines < 2 then
+                task:insert_result('HFILTER_URL_ONELINE', 1.00)
+              end
             end
           end
         end