aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lualib/lua_magic/heuristics.lua38
-rw-r--r--lualib/lua_magic/types.lua15
2 files changed, 49 insertions, 4 deletions
diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index 6a407f5e9..167edd0c9 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -165,9 +165,42 @@ local function detect_ole_format(input, log_obj)
until directory_offset >= inplen
end
-
exports.ole_format_heuristic = detect_ole_format
+local function detect_archive_flaw(part, arch)
+ local arch_type = arch:get_type()
+ local res = {
+ docx = 0,
+ xlsx = 0,
+ pptx = 0,
+ jar = 0,
+ } -- ext + confidence pairs
+
+ -- General msoffice patterns
+ local function add_msoffice_confidence(incr)
+ res.docx = res.docx + incr
+ res.xlsx = res.xlsx + incr
+ res.pptx = res.pptx + incr
+ end
+
+ if arch_type == 'zip' then
+ -- Find specific files/folders in zip file
+ local files = arch:get_files() or {}
+ for _,file in ipairs(files) do
+ if file == '[Content_Types].xml' then
+ add_msoffice_confidence(10)
+ elseif file == 'xl/' then
+ res.xlsx = res.xlsx + 30
+ elseif file == 'word/' then
+ res.xlsx = res.docx + 30
+ elseif file == 'ppt/' then
+ res.xlsx = res.pptx + 30
+ end
+ end
+ end
+
+ return arch_type:lower(),40
+end
exports.mime_part_heuristic = function(part)
if part:is_text() then
if part:get_text():is_html() then
@@ -184,8 +217,7 @@ exports.mime_part_heuristic = function(part)
if part:is_archive() then
local arch = part:get_archive()
- -- TODO: add files heuristics
- return arch:get_type():lower(),60
+ return detect_archive_flaw(part, arch)
end
return nil
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index c8850cd18..c5de552c8 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -157,7 +157,20 @@ local types = {
},
msg = {
ct = 'application/vnd.ms-outlook',
- type = 'executable'
+ type = 'msoffice'
+ },
+ -- newer office (2007+)
+ docx = {
+ ct = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+ type = 'msoffice'
+ },
+ xlsx = {
+ ct = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+ type = 'msoffice'
+ },
+ pptx = {
+ ct = 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+ type = 'msoffice'
},
-- other
pgp = {