@@ -52,6 +52,7 @@ local function compile_msoffice_trie(log_obj) | |||
local strs = {} | |||
for ext,pats in pairs(msoffice_patterns) do | |||
for _,pat in ipairs(pats) do | |||
-- These are utf16 strings in fact... | |||
strs[#strs + 1] = '^' .. | |||
table.concat( | |||
fun.totable( | |||
@@ -66,6 +67,7 @@ local function compile_msoffice_trie(log_obj) | |||
strs = {} | |||
for ext,pats in pairs(msoffice_clsids) do | |||
for _,pat in ipairs(pats) do | |||
-- Convert hex to re | |||
local hex_table = {} | |||
for i=1,#pat,2 do | |||
local subc = pat:sub(i, i + 1) | |||
@@ -163,6 +165,30 @@ local function detect_ole_format(input, log_obj) | |||
until directory_offset >= inplen | |||
end | |||
exports.ole_format_heuristic = detect_ole_format | |||
exports.mime_part_heuristic = function(part) | |||
if part:is_text() then | |||
if part:get_text():is_html() then | |||
return 'html',60 | |||
else | |||
return 'txt',60 | |||
end | |||
end | |||
if part:is_image() then | |||
local img = part:get_image() | |||
return img:get_type():lower(),60 | |||
end | |||
if part:is_archive() then | |||
local arch = part:get_archive() | |||
-- TODO: add files heuristics | |||
return arch:get_type():lower(),60 | |||
end | |||
return nil | |||
end | |||
return exports |
@@ -21,6 +21,7 @@ limitations under the License. | |||
local patterns = require "lua_magic/patterns" | |||
local types = require "lua_magic/types" | |||
local heuristics = require "lua_magic/heuristics" | |||
local fun = require "fun" | |||
local lua_util = require "lua_util" | |||
@@ -317,6 +318,16 @@ exports.detect = function(input, log_obj) | |||
return nil | |||
end | |||
exports.detect_mime_part = function(part, log_obj) | |||
local ext,weight = heuristics.mime_part_heuristic(part) | |||
if ext and weight and weight > 20 then | |||
return ext,types[ext] | |||
end | |||
return exports.detect(part:get_content(), log_obj) | |||
end | |||
-- This parameter specifies how many bytes are checked in the input | |||
-- Rspamd checks 2 chunks at start and 1 chunk at the end | |||
exports.chunk_size = 32768 |
@@ -168,6 +168,50 @@ local types = { | |||
ct = 'application/x-uuencoded', | |||
type = 'binary', | |||
}, | |||
-- Types that are detected by Rspamd itself | |||
-- Archives | |||
zip = { | |||
ct = 'application/zip', | |||
type = 'archive', | |||
}, | |||
rar = { | |||
ct = 'application/x-rar', | |||
type = 'archive', | |||
}, | |||
['7z'] = { | |||
ct = 'x-7z-compressed', | |||
type = 'archive', | |||
}, | |||
gz = { | |||
ct = 'application/gzip', | |||
type = 'archive', | |||
}, | |||
-- Images | |||
png = { | |||
ct = 'image/png', | |||
type = 'image', | |||
}, | |||
gif = { | |||
ct = 'image/gif', | |||
type = 'image', | |||
}, | |||
jpg = { | |||
ct = 'image/jpeg', | |||
type = 'image', | |||
}, | |||
bmp = { | |||
type = 'image', | |||
ct = 'image/bmp', | |||
}, | |||
-- Text | |||
txt = { | |||
type = 'text', | |||
ct = 'text/plain', | |||
}, | |||
html = { | |||
type = 'text', | |||
ct = 'text/html', | |||
}, | |||
} | |||
return types |