diff options
-rw-r--r-- | lualib/lua_magic/heuristics.lua | 38 | ||||
-rw-r--r-- | lualib/lua_magic/types.lua | 15 |
2 files changed, 49 insertions, 4 deletions
diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua index 6a407f5e9..167edd0c9 100644 --- a/lualib/lua_magic/heuristics.lua +++ b/lualib/lua_magic/heuristics.lua @@ -165,9 +165,42 @@ local function detect_ole_format(input, log_obj) until directory_offset >= inplen end - exports.ole_format_heuristic = detect_ole_format +local function detect_archive_flaw(part, arch) + local arch_type = arch:get_type() + local res = { + docx = 0, + xlsx = 0, + pptx = 0, + jar = 0, + } -- ext + confidence pairs + + -- General msoffice patterns + local function add_msoffice_confidence(incr) + res.docx = res.docx + incr + res.xlsx = res.xlsx + incr + res.pptx = res.pptx + incr + end + + if arch_type == 'zip' then + -- Find specific files/folders in zip file + local files = arch:get_files() or {} + for _,file in ipairs(files) do + if file == '[Content_Types].xml' then + add_msoffice_confidence(10) + elseif file == 'xl/' then + res.xlsx = res.xlsx + 30 + elseif file == 'word/' then + res.xlsx = res.docx + 30 + elseif file == 'ppt/' then + res.xlsx = res.pptx + 30 + end + end + end + + return arch_type:lower(),40 +end exports.mime_part_heuristic = function(part) if part:is_text() then if part:get_text():is_html() then @@ -184,8 +217,7 @@ exports.mime_part_heuristic = function(part) if part:is_archive() then local arch = part:get_archive() - -- TODO: add files heuristics - return arch:get_type():lower(),60 + return detect_archive_flaw(part, arch) end return nil diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua index c8850cd18..c5de552c8 100644 --- a/lualib/lua_magic/types.lua +++ b/lualib/lua_magic/types.lua @@ -157,7 +157,20 @@ local types = { }, msg = { ct = 'application/vnd.ms-outlook', - type = 'executable' + type = 'msoffice' + }, + -- newer office (2007+) + docx = { + ct = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + type = 'msoffice' + }, + xlsx = { + ct = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + type = 'msoffice' + }, + pptx = { + ct = 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + type = 'msoffice' }, -- other pgp = { |