From ef16ff4ddd6f12dc2d6bc0bd5cc2dae5b219d592 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 16 Aug 2021 11:15:11 +0100 Subject: [PATCH] [Minor] Lua_magic: Fix detection for the parts with file names --- lualib/lua_magic/heuristics.lua | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua index 376e19cd4..0a0cb7471 100644 --- a/lualib/lua_magic/heuristics.lua +++ b/lualib/lua_magic/heuristics.lua @@ -429,6 +429,8 @@ exports.text_part_heuristic = function(part, log_obj, _) local start_span = content:span(1, span_len) local matches = txt_trie:match(start_span) local res = {} + local fname = part:get_filename() + if matches then -- Require at least 2 occurrences of those patterns for n,positions in pairs(matches) do @@ -447,8 +449,12 @@ exports.text_part_heuristic = function(part, log_obj, _) local ext, weight = process_top_detected(res) - if weight and weight >= 40 then - return ext, weight + if weight then + if weight >= 40 then + return ext, weight + elseif fname and weight >= 20 then + return ext, weight + end end end @@ -466,8 +472,6 @@ exports.text_part_heuristic = function(part, log_obj, _) and file:sub(-ext_len - 1, -ext_len - 1) == '.' end - - local fname = part:get_filename() if fname and (has_extension(fname, 'htm') or has_extension(fname, 'html')) then return 'html',21 end -- 2.39.5