From d3360f80fc68af9c486ec66bd77c2c8723944058 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 12:28:20 +0100 Subject: [Minor] Lua_trie: Fix match for lua_text --- src/lua/lua_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lua/lua_trie.c b/src/lua/lua_trie.c index b030c735a..82d11c50c 100644 --- a/src/lua/lua_trie.c +++ b/src/lua/lua_trie.c @@ -290,7 +290,7 @@ lua_trie_match (lua_State *L) } } else if (lua_type (L, 2) == LUA_TUSERDATA) { - t = lua_check_text (L, -1); + t = lua_check_text (L, 2); if (t && lua_trie_search_str (L, trie, t->start, t->len, cb)) { found = TRUE; -- cgit v1.2.3 From 786faec3794563dd8a1fb503695d50797cc2bffa Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 12:28:39 +0100 Subject: [Project] Lua_magic: Implement chunks based scan --- lualib/lua_magic/init.lua | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua index 464a10d0a..e8629eeda 100644 --- a/lualib/lua_magic/init.lua +++ b/lualib/lua_magic/init.lua @@ -24,6 +24,7 @@ local types = require "lua_magic/types" local fun = require "fun" local lua_util = require "lua_util" +local rspamd_text = require "rspamd_text" local rspamd_trie = require "rspamd_trie" local N = "lua_magic" @@ -55,9 +56,7 @@ local function process_patterns() end end -exports.detect = function(input, log_obj) - process_patterns() - local res = {} +local function match_chunk(input, offset, log_obj, res) local matches = compiled_patterns:match(input) if not log_obj then log_obj = rspamd_config end @@ -106,7 +105,7 @@ exports.detect = function(input, log_obj) local position = match.position for _,pos in ipairs(matched_positions) do - if match_position(pos, position) then + if match_position(pos + offset, position) then add_result(match, pattern) end end @@ -122,6 +121,30 @@ exports.detect = function(input, log_obj) end end end +end +exports.detect = function(input, log_obj) + process_patterns() + local res = {} + + if type(input) == 'string' then + -- Convert to rspamd_text + input = rspamd_text.fromstring(input) + end + + if type(input) == 'userdata' and #input > exports.chunk_size * 3 then + -- Split by chunks + local chunk1, chunk2, chunk3 = + input:span(1, exports.chunk_size), + input:span(exports.chunk_size, exports.chunk_size), + input:span(#input - exports.chunk_size, exports.chunk_size) + local offset1, offset2, offset3 = 0, exports.chunk_size, #input - exports.chunk_size + + match_chunk(chunk1, offset1, log_obj, res) + match_chunk(chunk2, offset2, log_obj, res) + match_chunk(chunk3, offset3, log_obj, res) + else + match_chunk(input, 0, log_obj, res) + end local extensions = lua_util.keys(res) @@ -137,4 +160,8 @@ exports.detect = function(input, log_obj) return nil end +-- This parameter specifies how many bytes are checked in the input +-- Rspamd checks 2 chunks at start and 1 chunk at the end +exports.chunk_size = 16384 + return exports \ No newline at end of file -- cgit v1.2.3 From 5aed65dc5cd6ac78f39e1c4ff4e9471ab434181e Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 12:39:21 +0100 Subject: [Project] Lua_magic: Support hex patterns --- lualib/lua_magic/init.lua | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua index e8629eeda..1ba899b06 100644 --- a/lualib/lua_magic/init.lua +++ b/lualib/lua_magic/init.lua @@ -42,6 +42,16 @@ local function process_patterns() processed_patterns[#processed_patterns + 1] = { match.string, match, pattern } + elseif match.hex then + local hex_table = {} + + for i=1,#match.hex,2 do + local subc = match.hex:sub(i, i + 1) + hex_table[#hex_table + 1] = string.format('\\x{%s}', subc) + end + processed_patterns[#processed_patterns + 1] = { + table.concat(hex_table), match, pattern + } end end end -- cgit v1.2.3 From 055640c105492d4aaa8a75f973ce208ffd8cc045 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 14:05:31 +0100 Subject: [Project] Lua_magic: Improve short patterns performance --- lualib/lua_magic/init.lua | 131 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 97 insertions(+), 34 deletions(-) diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua index 1ba899b06..a2b2c9882 100644 --- a/lualib/lua_magic/init.lua +++ b/lualib/lua_magic/init.lua @@ -31,17 +31,43 @@ local N = "lua_magic" local exports = {} -- trie object local compiled_patterns +local compiled_short_patterns -- short patterns -- {, , } indexed by pattern number local processed_patterns = {} +local short_patterns = {} + +local short_match_limit = 128 +local max_short_offset = -1 + +local function process_patterns(log_obj) + -- Add pattern to either short patterns or to normal patterns + local function add_processed(str, match, pattern) + if match.position and type(match.position) == 'number' and + match.position < short_match_limit then + short_patterns[#short_patterns + 1] = { + str, match, pattern + } + + if max_short_offset < match.position then + max_short_offset = match.position + end + else + processed_patterns[#processed_patterns + 1] = { + str, match, pattern + } + end + end -local function process_patterns() if not compiled_patterns then - for _,pattern in ipairs(patterns) do + for ext,pattern in pairs(patterns) do + assert(types[ext]) + pattern.ext = ext for _,match in ipairs(pattern.matches) do if match.string then - processed_patterns[#processed_patterns + 1] = { - match.string, match, pattern - } + if match.relative_position and not match.position then + match.position = match.relative_position + #match.string + end + add_processed(match.string, match, pattern) elseif match.hex then local hex_table = {} @@ -49,9 +75,11 @@ local function process_patterns() local subc = match.hex:sub(i, i + 1) hex_table[#hex_table + 1] = string.format('\\x{%s}', subc) end - processed_patterns[#processed_patterns + 1] = { - table.concat(hex_table), match, pattern - } + + if match.relative_position and not match.position then + match.position = match.relative_position + #match.hex / 2 + end + add_processed(table.concat(hex_table), match, pattern) end end end @@ -60,16 +88,19 @@ local function process_patterns() fun.map(function(t) return t[1] end, processed_patterns)), rspamd_trie.flags.re ) + compiled_short_patterns = rspamd_trie.create(fun.totable( + fun.map(function(t) return t[1] end, short_patterns)), + rspamd_trie.flags.re + ) - lua_util.debugm(N, rspamd_config, 'compiled %s patterns', - #processed_patterns) + lua_util.debugm(N, log_obj, + 'compiled %s (%s short and %s long) patterns', + #processed_patterns + #short_patterns, #short_patterns, #processed_patterns) end end -local function match_chunk(input, offset, log_obj, res) - local matches = compiled_patterns:match(input) - - if not log_obj then log_obj = rspamd_config end +local function match_chunk(input, offset, trie, processed_tbl, log_obj, res) + local matches = trie:match(input) local function add_result(match, pattern) if not res[pattern.ext] then @@ -86,7 +117,7 @@ local function match_chunk(input, offset, log_obj, res) end for npat,matched_positions in pairs(matches) do - local pat_data = processed_patterns[npat] + local pat_data = processed_tbl[npat] local pattern = pat_data[3] local match = pat_data[2] @@ -132,8 +163,25 @@ local function match_chunk(input, offset, log_obj, res) end end end + +local function process_detected(res) + local extensions = lua_util.keys(res) + + if #extensions > 0 then + table.sort(extensions, function(ex1, ex2) + return res[ex1] > res[ex2] + end) + + return extensions,res[extensions[1]] + end + + return nil +end + exports.detect = function(input, log_obj) - process_patterns() + if not log_obj then log_obj = rspamd_config end + process_patterns(log_obj) + local res = {} if type(input) == 'string' then @@ -141,28 +189,43 @@ exports.detect = function(input, log_obj) input = rspamd_text.fromstring(input) end - if type(input) == 'userdata' and #input > exports.chunk_size * 3 then - -- Split by chunks - local chunk1, chunk2, chunk3 = - input:span(1, exports.chunk_size), - input:span(exports.chunk_size, exports.chunk_size), - input:span(#input - exports.chunk_size, exports.chunk_size) - local offset1, offset2, offset3 = 0, exports.chunk_size, #input - exports.chunk_size - - match_chunk(chunk1, offset1, log_obj, res) - match_chunk(chunk2, offset2, log_obj, res) - match_chunk(chunk3, offset3, log_obj, res) + + if type(input) == 'userdata' then + -- Try short match + local head = input:span(1, math.min(max_short_offset, #input)) + match_chunk(head, 0, compiled_short_patterns, short_patterns, log_obj, res) + + local extensions,confidence = process_detected(res) + + if extensions and #extensions > 0 and confidence > 30 then + -- We are done on short patterns + return extensions[1],types[extensions[1]] + end + + if #input > exports.chunk_size * 3 then + -- Chunked version as input is too long + local chunk1, chunk2, chunk3 = + input:span(1, exports.chunk_size), + input:span(exports.chunk_size, exports.chunk_size), + input:span(#input - exports.chunk_size, exports.chunk_size) + local offset1, offset2, offset3 = 0, exports.chunk_size, #input - exports.chunk_size + + match_chunk(chunk1, offset1, compiled_patterns, processed_patterns, log_obj, res) + match_chunk(chunk2, offset2, compiled_patterns, processed_patterns, log_obj, res) + match_chunk(chunk3, offset3, compiled_patterns, processed_patterns, log_obj, res) + else + -- Input is short enough to match it at all + match_chunk(input, 0, compiled_patterns, processed_patterns, log_obj, res) + end else - match_chunk(input, 0, log_obj, res) + -- Input is a table so just try to match it all... + match_chunk(input, 0, compiled_short_patterns, short_patterns, log_obj, res) + match_chunk(input, 0, compiled_patterns, processed_patterns, log_obj, res) end - local extensions = lua_util.keys(res) - - if #extensions > 0 then - table.sort(extensions, function(ex1, ex2) - return res[ex1] > res[ex2] - end) + local extensions = process_detected(res) + if extensions and #extensions > 0 then return extensions[1],types[extensions[1]] end -- cgit v1.2.3 From 8dffba8ba0717f1f8a4ba9e006ca1f5942decfc2 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 14:06:14 +0100 Subject: [Project] Lua_magic: Add more file types --- lualib/lua_magic/patterns.lua | 144 ++++++++++++++++++++++++++++++++++++++++-- lualib/lua_magic/types.lua | 59 ++++++++++++++++- 2 files changed, 196 insertions(+), 7 deletions(-) diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua index 354f8ec61..a52baa790 100644 --- a/lualib/lua_magic/patterns.lua +++ b/lualib/lua_magic/patterns.lua @@ -20,14 +20,12 @@ limitations under the License. --]] local patterns = { - { - -- MSDOS extension to match types table - ext = 'pdf', + pdf = { -- These are alternatives matches = { { string = [[%PDF-\d]], - position = 6, -- must be end of the match, as that's how hyperscan works + position = 6, -- must be end of the match, as that's how hyperscan works (or use relative_position) weight = 60, }, { @@ -41,6 +39,144 @@ local patterns = { weight = 60, }, }, + }, + ps = { + matches = { + { + string = [[%!PS-Adobe]], + relative_position = 0, + weight = 60, + }, + }, + }, + -- RTF document + rtf = { + matches = { + { + string = [[{\\rtf\d]], + position = 6, + weight = 60, + } + } + }, + chm = { + matches = { + { + string = [[ITSF]], + relative_position = 0, + weight = 60, + } + } + }, + djvu = { + matches = { + { + string = [[AT&TFORM]], + relative_position = 0, + weight = 60, + }, + { + string = [[DJVM]], + relative_position = 0x0c, + weight = 60, + } + } + }, + -- MS Exe file + exe = { + matches = { + { + string = [[MZ]], + relative_position = 0, + weight = 10, + }, + -- PE part + { + string = [[PE\x{00}\x{00}]], + position = {'>=', 0x3c + 4}, + weight = 40, + } + } + }, + -- Archives + arj = { + matches = { + { + hex = '60EA', + relative_position = 0, + weight = 60, + }, + } + }, + ace = { + matches = { + { + string = [[\*\*ACE\*\*]], + position = 14, + weight = 60, + }, + } + }, + cab = { + matches = { + { + string = [[MSCF]], + relative_position = 0, + weight = 60, + }, + } + }, + -- Images + psd = { + matches = { + { + string = [[8BPS]], + relative_position = 0, + weight = 60, + }, + } + }, + ico = { + matches = { + { + hex = [[00000100]], + relative_position = 0, + weight = 60, + }, + } + }, + pcx = { + matches = { + { + hex = [[0A050108]], + relative_position = 0, + weight = 60, + }, + } + }, + pic = { + matches = { + { + hex = [[FF80C9C71A00]], + relative_position = 0, + weight = 60, + }, + } + }, + -- Other + pgp = { + matches = { + { + hex = [[A803504750]], + relative_position = 0, + weight = 60, + }, + { + hex = [[2D424547494E20504750204D4553534147452D]], + relative_position = 0, + weight = 60, + }, + } } } diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua index 746c87400..667296715 100644 --- a/lualib/lua_magic/types.lua +++ b/lualib/lua_magic/types.lua @@ -22,17 +22,70 @@ limitations under the License. -- This table is indexed by msdos extension for convenience local types = { + -- exe + exe = { + ct = 'application/x-ms-application', + type = 'executable', + }, + -- text + rtf = { + ct = "application/rtf", + type = 'text', + }, pdf = { ct = 'application/pdf', type = 'binary', }, - exe = { - ct = 'application/x-ms-application', - type = 'executable', + ps = { + ct = 'application/postscript', + type = 'binary', + }, + chm = { + ct = 'application/chm', + type = 'binary', + }, + djvu = { + ct = 'application/djvu', + type = 'binary', + }, + -- archives + arj = { + ct = 'application/x-compressed', + type = 'archive', + }, + cab = { + ct = 'application/x-compressed', + type = 'archive', + }, + ace = { + ct = 'application/x-compressed', + type = 'archive', + }, + -- images + psd = { + ct = 'image/psd', + type = 'image', + }, + pcx = { + ct = 'image/pcx', + type = 'image', + }, + pic = { + ct = 'image/pic', + type = 'image', }, tiff = { ct = 'image/tiff', type = 'image', + }, + ico = { + ct = 'image/ico', + type = 'image', + }, + -- other + pgp = { + ct = 'application/encrypted', + type = 'encrypted' } } -- cgit v1.2.3 From bd13783018884a90571d1e94754e8bbf81369b82 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 17:14:47 +0100 Subject: [Project] Lua_magic: Support tail patterns --- lualib/lua_magic/init.lua | 102 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 26 deletions(-) diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua index a2b2c9882..4ecc66afa 100644 --- a/lualib/lua_magic/init.lua +++ b/lualib/lua_magic/init.lua @@ -29,27 +29,39 @@ local rspamd_trie = require "rspamd_trie" local N = "lua_magic" local exports = {} --- trie object +-- trie objects local compiled_patterns -local compiled_short_patterns -- short patterns +local compiled_short_patterns +local compiled_tail_patterns -- {, , } indexed by pattern number local processed_patterns = {} local short_patterns = {} +local tail_patterns = {} local short_match_limit = 128 local max_short_offset = -1 +local min_tail_offset = math.huge local function process_patterns(log_obj) -- Add pattern to either short patterns or to normal patterns local function add_processed(str, match, pattern) - if match.position and type(match.position) == 'number' and - match.position < short_match_limit then - short_patterns[#short_patterns + 1] = { - str, match, pattern - } + if match.position and type(match.position) == 'number' then + if match.tail then + -- Tail pattern + tail_patterns[#tail_patterns + 1] = { + str, match, pattern + } + if min_tail_offset > match.tail then + min_tail_offset = match.tail + end + elseif match.position < short_match_limit then + short_patterns[#short_patterns + 1] = { + str, match, pattern + } - if max_short_offset < match.position then - max_short_offset = match.position + if max_short_offset < match.position then + max_short_offset = match.position + end end else processed_patterns[#processed_patterns + 1] = { @@ -92,15 +104,21 @@ local function process_patterns(log_obj) fun.map(function(t) return t[1] end, short_patterns)), rspamd_trie.flags.re ) + compiled_tail_patterns = rspamd_trie.create(fun.totable( + fun.map(function(t) return t[1] end, tail_patterns)), + rspamd_trie.flags.re + ) lua_util.debugm(N, log_obj, - 'compiled %s (%s short and %s long) patterns', - #processed_patterns + #short_patterns, #short_patterns, #processed_patterns) + 'compiled %s (%s short; %s long; %s tail) patterns', + #processed_patterns + #short_patterns + #tail_patterns, + #short_patterns, #processed_patterns, #tail_patterns) end end -local function match_chunk(input, offset, trie, processed_tbl, log_obj, res) +local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, res) local matches = trie:match(input) + local last = tlen local function add_result(match, pattern) if not res[pattern.ext] then @@ -139,6 +157,11 @@ local function match_chunk(input, offset, trie, processed_tbl, log_obj, res) expected = expected[2] end + -- Tail match + if expected < 0 then + expected = last + expected + 1 + end + return cmp(pos, expected) end -- Single position @@ -146,19 +169,33 @@ local function match_chunk(input, offset, trie, processed_tbl, log_obj, res) local position = match.position for _,pos in ipairs(matched_positions) do + lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)', + pattern.ext, pos, offset) if match_position(pos + offset, position) then add_result(match, pattern) + break end end end -- Match all positions if match.positions then + local all_right = true for _,position in ipairs(match.positions) do + local matched = false for _,pos in ipairs(matched_positions) do - if match_position(pos, position) then - add_result(match, pattern) + if not match_position(pos + offset, position) then + matched = true + break end end + if not matched then + all_right = false + break + end + end + + if all_right then + add_result(match, pattern) end end end @@ -191,10 +228,21 @@ exports.detect = function(input, log_obj) if type(input) == 'userdata' then + local inplen = #input + + -- Check tail matches + if inplen > min_tail_offset then + local tail = input:span(inplen - min_tail_offset, min_tail_offset) + match_chunk(tail, inplen, inplen - min_tail_offset, + compiled_tail_patterns, tail_patterns, log_obj, res) + end + -- Try short match - local head = input:span(1, math.min(max_short_offset, #input)) - match_chunk(head, 0, compiled_short_patterns, short_patterns, log_obj, res) + local head = input:span(1, math.min(max_short_offset, inplen)) + match_chunk(head, inplen, 0, + compiled_short_patterns, short_patterns, log_obj, res) + -- Check if we have enough data or go to long patterns local extensions,confidence = process_detected(res) if extensions and #extensions > 0 and confidence > 30 then @@ -207,20 +255,22 @@ exports.detect = function(input, log_obj) local chunk1, chunk2, chunk3 = input:span(1, exports.chunk_size), input:span(exports.chunk_size, exports.chunk_size), - input:span(#input - exports.chunk_size, exports.chunk_size) - local offset1, offset2, offset3 = 0, exports.chunk_size, #input - exports.chunk_size - - match_chunk(chunk1, offset1, compiled_patterns, processed_patterns, log_obj, res) - match_chunk(chunk2, offset2, compiled_patterns, processed_patterns, log_obj, res) - match_chunk(chunk3, offset3, compiled_patterns, processed_patterns, log_obj, res) + input:span(inplen - exports.chunk_size, exports.chunk_size) + local offset1, offset2, offset3 = 0, exports.chunk_size, inplen - exports.chunk_size + + match_chunk(chunk1, inplen, + offset1, compiled_patterns, processed_patterns, log_obj, res) + match_chunk(chunk2, inplen, + offset2, compiled_patterns, processed_patterns, log_obj, res) + match_chunk(chunk3, inplen, + offset3, compiled_patterns, processed_patterns, log_obj, res) else -- Input is short enough to match it at all - match_chunk(input, 0, compiled_patterns, processed_patterns, log_obj, res) + match_chunk(input, inplen, 0, + compiled_patterns, processed_patterns, log_obj, res) end else - -- Input is a table so just try to match it all... - match_chunk(input, 0, compiled_short_patterns, short_patterns, log_obj, res) - match_chunk(input, 0, compiled_patterns, processed_patterns, log_obj, res) + assert(0) end local extensions = process_detected(res) -- cgit v1.2.3 From c9af91f7ecf9d7a39ac2e07dbc7168462ab24de8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 18:03:09 +0100 Subject: [Project] Lua_magic: Some tweaks --- lualib/lua_magic/init.lua | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua index 4ecc66afa..2dbd24da5 100644 --- a/lualib/lua_magic/init.lua +++ b/lualib/lua_magic/init.lua @@ -72,7 +72,7 @@ local function process_patterns(log_obj) if not compiled_patterns then for ext,pattern in pairs(patterns) do - assert(types[ext]) + assert(types[ext], 'not found type: ' .. ext) pattern.ext = ext for _,match in ipairs(pattern.matches) do if match.string then @@ -250,6 +250,7 @@ exports.detect = function(input, log_obj) return extensions[1],types[extensions[1]] end + -- No way, let's check data in chunks or just the whole input if it is small enough if #input > exports.chunk_size * 3 then -- Chunked version as input is too long local chunk1, chunk2, chunk3 = @@ -270,7 +271,8 @@ exports.detect = function(input, log_obj) compiled_patterns, processed_patterns, log_obj, res) end else - assert(0) + -- Table input is NYI + assert(0, 'table input for match') end local extensions = process_detected(res) -- cgit v1.2.3 From 0d5d24b15877bce801381da5b161d62a4ef0bae9 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 18:03:19 +0100 Subject: [Project] Lua_magic: Moar patterns --- lualib/lua_magic/patterns.lua | 151 +++++++++++++++++++++++++++++++++++++++++- lualib/lua_magic/types.lua | 58 ++++++++++++++-- 2 files changed, 202 insertions(+), 7 deletions(-) diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua index a52baa790..dd723f6e8 100644 --- a/lualib/lua_magic/patterns.lua +++ b/lualib/lua_magic/patterns.lua @@ -98,6 +98,36 @@ local patterns = { } } }, + elf = { + matches = { + { + hex = [[7f454c46]], + relative_position = 0, + weight = 60, + }, + } + }, + lnk = { + matches = { + { + hex = [[4C0000000114020000000000C000000000000046]], + relative_position = 0, + weight = 60, + }, + } + }, + class = { + -- Technically, this also matches MachO files, but I don't care about + -- Apple and their mental health problems here: just consider Java files, + -- Mach object files and all other cafe babes as bad and block them! + matches = { + { + hex = [[cafebabe]], + relative_position = 0, + weight = 60, + }, + } + }, -- Archives arj = { matches = { @@ -120,7 +150,82 @@ local patterns = { cab = { matches = { { - string = [[MSCF]], + hex = [[4d53434600000000]], -- Can be anywhere for SFX :( + position = {'>=', 8}, + weight = 60, + }, + } + }, + tar = { + matches = { + { + string = [[ustar]], + relative_position = 257, + weight = 60, + }, + } + }, + bz2 = { + matches = { + { + string = "BZ[h0]", + position = 3, + weight = 60, + }, + } + }, + lz4 = { + matches = { + { + hex = "184d2204", + relative_position = 0, + weight = 60, + }, + { + hex = "184c2103", + relative_position = 0, + weight = 60, + }, + { + hex = "184c2102", + relative_position = 0, + weight = 60, + }, + } + }, + zst = { + matches = { + { + string = [[\x{FD}\x{2F}\x{B5}[\x{22}-\x{40}].]], + position = 5, -- includes last . + weight = 60, + }, + } + }, + -- Apple is a 'special' child: this needs to be matched at the data tail... + dmg = { + matches = { + { + string = [[koly]], + position = -512 + 4, + weight = 61, + tail = 512, + }, + } + }, + szdd = { + matches = { + { + hex = [[535a4444]], + relative_position = 0, + weight = 60, + }, + } + }, + xz = { + matches = { + { + hex = [[FD377A585A00]], relative_position = 0, weight = 60, }, @@ -163,6 +268,39 @@ local patterns = { }, } }, + swf = { + matches = { + { + hex = [[5a5753]], -- LZMA + relative_position = 0, + weight = 60, + }, + { + hex = [[435753]], -- Zlib + relative_position = 0, + weight = 60, + }, + { + hex = [[465753]], -- Uncompressed + relative_position = 0, + weight = 60, + }, + } + }, + tiff = { + matches = { + { + hex = [[49492a00]], -- LE encoded + relative_position = 0, + weight = 60, + }, + { + hex = [[4d4d]], -- BE tiff + relative_position = 0, + weight = 60, + }, + } + }, -- Other pgp = { matches = { @@ -177,7 +315,16 @@ local patterns = { weight = 60, }, } - } + }, + uue = { + matches = { + { + hex = [[626567696e20]], + relative_position = 0, + weight = 60, + }, + } + }, } return patterns \ No newline at end of file diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua index 667296715..20089c9c2 100644 --- a/lualib/lua_magic/types.lua +++ b/lualib/lua_magic/types.lua @@ -27,6 +27,18 @@ local types = { ct = 'application/x-ms-application', type = 'executable', }, + elf = { + ct = 'application/x-elf-executable', + type = 'executable', + }, + lnk = { + ct = 'application/x-ms-application', + type = 'executable', + }, + class = { + ct = 'application/x-java-applet', + type = 'executable', + }, -- text rtf = { ct = "application/rtf", @@ -41,23 +53,51 @@ local types = { type = 'binary', }, chm = { - ct = 'application/chm', + ct = 'application/x-chm', type = 'binary', }, djvu = { - ct = 'application/djvu', + ct = 'application/x-djvu', type = 'binary', }, -- archives arj = { - ct = 'application/x-compressed', + ct = 'application/x-arj', type = 'archive', }, cab = { - ct = 'application/x-compressed', + ct = 'application/x-cab', type = 'archive', }, ace = { + ct = 'application/x-ace', + type = 'archive', + }, + tar = { + ct = 'application/x-tar', + type = 'archive', + }, + bz2 = { + ct = 'application/x-bzip', + type = 'archive', + }, + xz = { + ct = 'application/x-xz', + type = 'archive', + }, + lz4 = { + ct = 'application/x-lz4', + type = 'archive', + }, + zst = { + ct = 'application/x-zstandard', + type = 'archive', + }, + dmg = { + ct = 'application/x-dmg', + type = 'archive', + }, + szdd = { -- in fact, their MSDOS extension is like FOO.TX_ or FOO.TX$ ct = 'application/x-compressed', type = 'archive', }, @@ -82,11 +122,19 @@ local types = { ct = 'image/ico', type = 'image', }, + swf = { + ct = 'application/x-shockwave-flash', + type = 'image', + }, -- other pgp = { ct = 'application/encrypted', type = 'encrypted' - } + }, + uue = { + ct = 'application/x-uuencoded', + type = 'binary', + }, } return types \ No newline at end of file -- cgit v1.2.3 From dcd82d5c4f99472e3c84743ae9cf5d30ef4c0c12 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 18:43:42 +0100 Subject: [Project] Lua_magic: Fix some cases --- lualib/lua_magic/init.lua | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua index 2dbd24da5..5a4154c79 100644 --- a/lualib/lua_magic/init.lua +++ b/lualib/lua_magic/init.lua @@ -54,19 +54,34 @@ local function process_patterns(log_obj) if min_tail_offset > match.tail then min_tail_offset = match.tail end + + lua_util.debugm(N, log_obj, 'add tail pattern %s for ext %s', + str, pattern.ext) elseif match.position < short_match_limit then short_patterns[#short_patterns + 1] = { str, match, pattern } + lua_util.debugm(N, log_obj, 'add short pattern %s for ext %s', + str, pattern.ext) if max_short_offset < match.position then max_short_offset = match.position end + else + processed_patterns[#processed_patterns + 1] = { + str, match, pattern + } + + lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s', + str, pattern.ext) end else processed_patterns[#processed_patterns + 1] = { str, match, pattern } + + lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s', + str, pattern.ext) end end @@ -118,6 +133,7 @@ end local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, res) local matches = trie:match(input) + local last = tlen local function add_result(match, pattern) @@ -161,7 +177,6 @@ local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, re if expected < 0 then expected = last + expected + 1 end - return cmp(pos, expected) end -- Single position @@ -253,18 +268,15 @@ exports.detect = function(input, log_obj) -- No way, let's check data in chunks or just the whole input if it is small enough if #input > exports.chunk_size * 3 then -- Chunked version as input is too long - local chunk1, chunk2, chunk3 = - input:span(1, exports.chunk_size), - input:span(exports.chunk_size, exports.chunk_size), + local chunk1, chunk2 = + input:span(1, exports.chunk_size * 2), input:span(inplen - exports.chunk_size, exports.chunk_size) - local offset1, offset2, offset3 = 0, exports.chunk_size, inplen - exports.chunk_size + local offset1, offset2 = 0, inplen - exports.chunk_size match_chunk(chunk1, inplen, offset1, compiled_patterns, processed_patterns, log_obj, res) match_chunk(chunk2, inplen, offset2, compiled_patterns, processed_patterns, log_obj, res) - match_chunk(chunk3, inplen, - offset3, compiled_patterns, processed_patterns, log_obj, res) else -- Input is short enough to match it at all match_chunk(input, inplen, 0, @@ -287,6 +299,6 @@ end -- This parameter specifies how many bytes are checked in the input -- Rspamd checks 2 chunks at start and 1 chunk at the end -exports.chunk_size = 16384 +exports.chunk_size = 32768 return exports \ No newline at end of file -- cgit v1.2.3 From 859483618be3602ea3c405f944595f8c0d06e720 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Sep 2019 18:44:02 +0100 Subject: [Project] Lua_magic: Moar patterns --- lualib/lua_magic/patterns.lua | 9 +++++++++ lualib/lua_magic/types.lua | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua index dd723f6e8..003073cab 100644 --- a/lualib/lua_magic/patterns.lua +++ b/lualib/lua_magic/patterns.lua @@ -202,6 +202,15 @@ local patterns = { }, } }, + iso = { + matches = { + { + string = [[\x{01}CD001\x{01}]], + position = {'>=', 0x8000 + 7}, -- first 32k is unused + weight = 60, + }, + } + }, -- Apple is a 'special' child: this needs to be matched at the data tail... dmg = { matches = { diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua index 20089c9c2..b3af668c8 100644 --- a/lualib/lua_magic/types.lua +++ b/lualib/lua_magic/types.lua @@ -97,6 +97,10 @@ local types = { ct = 'application/x-dmg', type = 'archive', }, + iso = { + ct = 'application/x-iso', + type = 'archive', + }, szdd = { -- in fact, their MSDOS extension is like FOO.TX_ or FOO.TX$ ct = 'application/x-compressed', type = 'archive', -- cgit v1.2.3