aboutsummaryrefslogtreecommitdiffstats
path: root/lualib
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-09-09 12:53:49 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-09-09 12:53:49 +0100
commite7ebcf91c7a7649d758d1ef8f7b993500cc32a97 (patch)
tree73d6a1e2a2004e857025442c7f100e47a4ca9798 /lualib
parent23814946671f0385b38650aa855f701a0bfcd003 (diff)
downloadrspamd-e7ebcf91c7a7649d758d1ef8f7b993500cc32a97.tar.gz
rspamd-e7ebcf91c7a7649d758d1ef8f7b993500cc32a97.zip
[Project] Lua_magic: Simplify oasis files detection to a trie match
Diffstat (limited to 'lualib')
-rw-r--r--lualib/lua_magic/heuristics.lua65
1 files changed, 35 insertions, 30 deletions
diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index 4f2b583f0..60ff57224 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -41,8 +41,22 @@ local msoffice_clsids = {
msg = {[[46f0060000000000c000000000000046]], [[0b0d020000000000c000000000000046]]},
msi = {[[84100c0000000000c000000000000046]]},
}
+local zip_trie
+local zip_patterns = {
+ -- https://lists.oasis-open.org/archives/office/200505/msg00006.html
+ odt = {[[mimetypeapplication/vnd\.oasis\.opendocument.text]],
+ [[mimetypeapplication/vnd\.oasis.opendocument\.image]],
+ [[mimetypeapplication/vnd\.oasis\.opendocument\.graphic]]},
+ ods = {[[mimetypeapplication/vnd\.oasis\.opendocument\.spreadsheet]],
+ [[mimetypeapplication/vnd\.oasis\.opendocument.formula]],
+ [[mimetypeapplication/vnd\.oasis\.opendocument\.chart]]},
+ odp = {[[mimetypeapplication/vnd\.oasis\.opendocument\.presentation]]},
+}
+
+-- Used to match pattern index and extension
local msoffice_clsid_indexes = {}
local msoffice_patterns_indexes = {}
+local zip_patterns_indexes = {}
local exports = {}
@@ -84,6 +98,9 @@ local function compile_tries()
-- Clsids
msoffice_trie_clsid = compile_pats(msoffice_clsids, msoffice_clsid_indexes,
msoffice_clsid_transform)
+ -- Misc zip patterns at the initial fragment
+ zip_trie = compile_pats(zip_patterns, zip_patterns_indexes,
+ function(pat) return pat end)
end
end
@@ -219,36 +236,6 @@ local function detect_archive_flaw(part, arch, log_obj)
res.docx = res.docx + 30
elseif file:sub(1, 4) == 'ppt/' then
res.pptx = res.pptx + 30
- elseif file == 'META-INF/manifest.xml' then
- -- Apply ODT detection logic
- local content = part:get_content()
-
- if #content > 80 then
- -- https://lists.oasis-open.org/archives/office/200505/msg00006.html
- local start_span = content:span(30, 50)
-
- local mp = tostring(start_span:span(1, 8))
- if mp == 'mimetype' then
- local spec_type = tostring(start_span:span(9))
- if spec_type:find('vnd.oasis.opendocument.text') then
- res.odt = 40
- elseif spec_type:find('vnd.oasis.opendocument.spreadsheet') then
- res.ods = 40
- elseif spec_type:find('vnd.oasis.opendocument.formula') then
- res.ods = 40
- elseif spec_type:find('vnd.oasis.opendocument.chart') then
- res.ods = 40
- elseif spec_type:find('vnd.oasis.opendocument.presentation') then
- res.odp = 40
- elseif spec_type:find('vnd.oasis.opendocument.image') then
- -- Assume image as odt
- res.odt = 40
- elseif spec_type:find('vnd.oasis.opendocument.graphics') then
- -- Assume image as odt
- res.odt = 40
- end
- end
- end
end
end
@@ -257,6 +244,24 @@ local function detect_archive_flaw(part, arch, log_obj)
if weight >= 40 then
return ext,weight
end
+
+ -- Apply misc Zip detection logic
+ local content = part:get_content()
+
+ if #content > 128 then
+ local start_span = content:span(1, 128)
+
+ local matches = zip_trie:match(start_span)
+ if matches then
+ for n,_ in pairs(matches) do
+ if zip_patterns_indexes[n] then
+ lua_util.debugm(N, log_obj, "found zip pattern for %s",
+ zip_patterns_indexes[n])
+ return zip_patterns_indexes[n],40
+ end
+ end
+ end
+ end
end
return arch_type:lower(),40