From 7c742b4c0f833d70cd5ee68068ac50e39ada3eb6 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 5 Aug 2016 12:57:45 +0100 Subject: [PATCH] [Fix] Fix HFILTER_URL module --- src/plugins/lua/hfilter.lua | 64 ++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/src/plugins/lua/hfilter.lua b/src/plugins/lua/hfilter.lua index b32bac890..88df8f545 100644 --- a/src/plugins/lua/hfilter.lua +++ b/src/plugins/lua/hfilter.lua @@ -242,35 +242,49 @@ local function hfilter(task) if config['url_enabled'] then local parts = task:get_text_parts() if parts then - --One text part-- - local total_parts_len = 0 - local text_parts_count = 0 - local lines_max = 0 - local selected_text_part = nil + local plain_text_part = nil + local html_text_part = nil for _,p in ipairs(parts) do - total_parts_len = total_parts_len + p:get_length() - - if p:get_lines_count() > lines_max then - lines_max = p:get_lines_count() - end - - if not p:is_html() then - text_parts_count = text_parts_count + 1 - selected_text_part = p + if p:is_html() then + html_text_part = p + else + plain_text_part = p end end - if total_parts_len > 0 then - local urls = task:get_urls() - if urls then - local total_url_len = 0 - for _,url in ipairs(urls) do - total_url_len = total_url_len + url:get_length() + + if html_text_part then + local hc = html_text_part:get_html() + if hc then + local url_len = 0 + hc:foreach_tag('a', function(tag, len) + url_len = url_len + len + return false + end) + + local plen = html_text_part:get_length() + + if url_len > 0 and plen > 0 then + local rel = url_len / plen + if rel > 0.8 then + task:insert_result('HFILTER_URL_ONLY', (rel - 0.8) * 5.0) + local lines = html_text_part:get_lines() + if lines > 0 and lines < 2 then + task:insert_result('HFILTER_URL_ONELINE', 1.00) + end + end end - if total_url_len > 0 then - if total_url_len + 7 > total_parts_len then - task:insert_result('HFILTER_URL_ONLY', 1.00) - elseif lines_max > 0 and lines_max < 2 then - task:insert_result('HFILTER_URL_ONELINE', 1.00) + elseif plain_text_part then + local url_len = plain_text_part:get_urls_length() + local plen = plain_text_part:get_length() + + if plen > 0 and url_len > 0 then + local rel = url_len / plen + if rel > 0.8 then + task:insert_result('HFILTER_URL_ONLY', (rel - 0.8) * 5.0) + local lines = plain_text_part:get_lines() + if lines > 0 and lines < 2 then + task:insert_result('HFILTER_URL_ONELINE', 1.00) + end end end end -- 2.39.5