diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-09-05 15:29:35 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-09-05 15:29:35 +0100 |
commit | 8748c908b407a787ed5aab23cd0cf78f8d1ae4be (patch) | |
tree | a04490552fdd9035e21686b20e9d69200e00b0b6 /lualib | |
parent | f2b60f9e335b8890ed079c2c6b56b31d6d98e200 (diff) | |
download | rspamd-8748c908b407a787ed5aab23cd0cf78f8d1ae4be.tar.gz rspamd-8748c908b407a787ed5aab23cd0cf78f8d1ae4be.zip |
[Project] Lua_magic: Start new magic detection library
Diffstat (limited to 'lualib')
-rw-r--r-- | lualib/lua_magic/init.lua | 140 | ||||
-rw-r--r-- | lualib/lua_magic/patterns.lua | 47 | ||||
-rw-r--r-- | lualib/lua_magic/types.lua | 39 |
3 files changed, 226 insertions, 0 deletions
diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua new file mode 100644 index 000000000..464a10d0a --- /dev/null +++ b/lualib/lua_magic/init.lua @@ -0,0 +1,140 @@ +--[[ +Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +--[[[ +-- @module lua_magic +-- This module contains file types detection logic +--]] + +local patterns = require "lua_magic/patterns" +local types = require "lua_magic/types" +local fun = require "fun" +local lua_util = require "lua_util" + +local rspamd_trie = require "rspamd_trie" + +local N = "lua_magic" +local exports = {} +-- trie object +local compiled_patterns +-- {<str>, <match_object>, <pattern_object>} indexed by pattern number +local processed_patterns = {} + +local function process_patterns() + if not compiled_patterns then + for _,pattern in ipairs(patterns) do + for _,match in ipairs(pattern.matches) do + if match.string then + processed_patterns[#processed_patterns + 1] = { + match.string, match, pattern + } + end + end + end + + compiled_patterns = rspamd_trie.create(fun.totable( + fun.map(function(t) return t[1] end, processed_patterns)), + rspamd_trie.flags.re + ) + + lua_util.debugm(N, rspamd_config, 'compiled %s patterns', + #processed_patterns) + end +end + +exports.detect = function(input, log_obj) + process_patterns() + local res = {} + local matches = compiled_patterns:match(input) + + if not log_obj then log_obj = rspamd_config end + + local function add_result(match, pattern) + if not res[pattern.ext] then + res[pattern.ext] = 0 + end + if match.weight then + res[pattern.ext] = res[pattern.ext] + match.weight + else + res[pattern.ext] = res[pattern.ext] + 1 + end + + lua_util.debugm(N, log_obj,'add pattern for %s, weight %s, total weight %s', + pattern.ext, match.weight, res[pattern.ext]) + end + + for npat,matched_positions in pairs(matches) do + local pat_data = processed_patterns[npat] + local pattern = pat_data[3] + local match = pat_data[2] + + local function match_position(pos, expected) + local cmp = function(a, b) return a == b end + if type(expected) == 'table' then + -- Something like {'>', 0} + if expected[1] == '>' then + cmp = function(a, b) return a > b end + elseif expected[1] == '>=' then + cmp = function(a, b) return a >= b end + elseif expected[1] == '<' then + cmp = function(a, b) return a < b end + elseif expected[1] == '<=' then + cmp = function(a, b) return a <= b end + elseif expected[1] == '!=' then + cmp = function(a, b) return a ~= b end + end + expected = expected[2] + end + + return cmp(pos, expected) + end + -- Single position + if match.position then + local position = match.position + + for _,pos in ipairs(matched_positions) do + if match_position(pos, position) then + add_result(match, pattern) + end + end + end + -- Match all positions + if match.positions then + for _,position in ipairs(match.positions) do + for _,pos in ipairs(matched_positions) do + if match_position(pos, position) then + add_result(match, pattern) + end + end + end + end + end + + local extensions = lua_util.keys(res) + + if #extensions > 0 then + table.sort(extensions, function(ex1, ex2) + return res[ex1] > res[ex2] + end) + + return extensions[1],types[extensions[1]] + end + + -- Nothing found + return nil +end + +return exports
\ No newline at end of file diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua new file mode 100644 index 000000000..354f8ec61 --- /dev/null +++ b/lualib/lua_magic/patterns.lua @@ -0,0 +1,47 @@ +--[[ +Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +--[[[ +-- @module lua_magic/patterns +-- This module contains most common patterns +--]] + +local patterns = { + { + -- MSDOS extension to match types table + ext = 'pdf', + -- These are alternatives + matches = { + { + string = [[%PDF-\d]], + position = 6, -- must be end of the match, as that's how hyperscan works + weight = 60, + }, + { + string = [[\012%PDF-\d]], + position = 7, + weight = 60, + }, + { + string = [[%FDF-\d]], + position = 6, + weight = 60, + }, + }, + } +} + +return patterns
\ No newline at end of file diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua new file mode 100644 index 000000000..746c87400 --- /dev/null +++ b/lualib/lua_magic/types.lua @@ -0,0 +1,39 @@ +--[[ +Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +--[[[ +-- @module lua_magic/patterns +-- This module contains types definitions +--]] + +-- This table is indexed by msdos extension for convenience + +local types = { + pdf = { + ct = 'application/pdf', + type = 'binary', + }, + exe = { + ct = 'application/x-ms-application', + type = 'executable', + }, + tiff = { + ct = 'image/tiff', + type = 'image', + } +} + +return types
\ No newline at end of file |