Browse Source

[Project] Lua_magic: Add mime parts detection function

tags/2.0
Vsevolod Stakhov 4 years ago
parent
commit
7cf7a889a6
3 changed files with 81 additions and 0 deletions
  1. 26
    0
      lualib/lua_magic/heuristics.lua
  2. 11
    0
      lualib/lua_magic/init.lua
  3. 44
    0
      lualib/lua_magic/types.lua

+ 26
- 0
lualib/lua_magic/heuristics.lua View File

@@ -52,6 +52,7 @@ local function compile_msoffice_trie(log_obj)
local strs = {}
for ext,pats in pairs(msoffice_patterns) do
for _,pat in ipairs(pats) do
-- These are utf16 strings in fact...
strs[#strs + 1] = '^' ..
table.concat(
fun.totable(
@@ -66,6 +67,7 @@ local function compile_msoffice_trie(log_obj)
strs = {}
for ext,pats in pairs(msoffice_clsids) do
for _,pat in ipairs(pats) do
-- Convert hex to re
local hex_table = {}
for i=1,#pat,2 do
local subc = pat:sub(i, i + 1)
@@ -163,6 +165,30 @@ local function detect_ole_format(input, log_obj)
until directory_offset >= inplen
end


exports.ole_format_heuristic = detect_ole_format

exports.mime_part_heuristic = function(part)
if part:is_text() then
if part:get_text():is_html() then
return 'html',60
else
return 'txt',60
end
end

if part:is_image() then
local img = part:get_image()
return img:get_type():lower(),60
end

if part:is_archive() then
local arch = part:get_archive()
-- TODO: add files heuristics
return arch:get_type():lower(),60
end

return nil
end

return exports

+ 11
- 0
lualib/lua_magic/init.lua View File

@@ -21,6 +21,7 @@ limitations under the License.

local patterns = require "lua_magic/patterns"
local types = require "lua_magic/types"
local heuristics = require "lua_magic/heuristics"
local fun = require "fun"
local lua_util = require "lua_util"

@@ -317,6 +318,16 @@ exports.detect = function(input, log_obj)
return nil
end

exports.detect_mime_part = function(part, log_obj)
local ext,weight = heuristics.mime_part_heuristic(part)

if ext and weight and weight > 20 then
return ext,types[ext]
end

return exports.detect(part:get_content(), log_obj)
end

-- This parameter specifies how many bytes are checked in the input
-- Rspamd checks 2 chunks at start and 1 chunk at the end
exports.chunk_size = 32768

+ 44
- 0
lualib/lua_magic/types.lua View File

@@ -168,6 +168,50 @@ local types = {
ct = 'application/x-uuencoded',
type = 'binary',
},
-- Types that are detected by Rspamd itself
-- Archives
zip = {
ct = 'application/zip',
type = 'archive',
},
rar = {
ct = 'application/x-rar',
type = 'archive',
},
['7z'] = {
ct = 'x-7z-compressed',
type = 'archive',
},
gz = {
ct = 'application/gzip',
type = 'archive',
},
-- Images
png = {
ct = 'image/png',
type = 'image',
},
gif = {
ct = 'image/gif',
type = 'image',
},
jpg = {
ct = 'image/jpeg',
type = 'image',
},
bmp = {
type = 'image',
ct = 'image/bmp',
},
-- Text
txt = {
type = 'text',
ct = 'text/plain',
},
html = {
type = 'text',
ct = 'text/html',
},
}

return types

Loading…
Cancel
Save