diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-09-06 12:28:39 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-09-06 12:28:39 +0100 |
commit | 786faec3794563dd8a1fb503695d50797cc2bffa (patch) | |
tree | 35660f12375880b930a55442dc76ac3a33007312 /lualib/lua_magic/init.lua | |
parent | d3360f80fc68af9c486ec66bd77c2c8723944058 (diff) | |
download | rspamd-786faec3794563dd8a1fb503695d50797cc2bffa.tar.gz rspamd-786faec3794563dd8a1fb503695d50797cc2bffa.zip |
[Project] Lua_magic: Implement chunks based scan
Diffstat (limited to 'lualib/lua_magic/init.lua')
-rw-r--r-- | lualib/lua_magic/init.lua | 35 |
1 files changed, 31 insertions, 4 deletions
diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua index 464a10d0a..e8629eeda 100644 --- a/lualib/lua_magic/init.lua +++ b/lualib/lua_magic/init.lua @@ -24,6 +24,7 @@ local types = require "lua_magic/types" local fun = require "fun" local lua_util = require "lua_util" +local rspamd_text = require "rspamd_text" local rspamd_trie = require "rspamd_trie" local N = "lua_magic" @@ -55,9 +56,7 @@ local function process_patterns() end end -exports.detect = function(input, log_obj) - process_patterns() - local res = {} +local function match_chunk(input, offset, log_obj, res) local matches = compiled_patterns:match(input) if not log_obj then log_obj = rspamd_config end @@ -106,7 +105,7 @@ exports.detect = function(input, log_obj) local position = match.position for _,pos in ipairs(matched_positions) do - if match_position(pos, position) then + if match_position(pos + offset, position) then add_result(match, pattern) end end @@ -122,6 +121,30 @@ exports.detect = function(input, log_obj) end end end +end +exports.detect = function(input, log_obj) + process_patterns() + local res = {} + + if type(input) == 'string' then + -- Convert to rspamd_text + input = rspamd_text.fromstring(input) + end + + if type(input) == 'userdata' and #input > exports.chunk_size * 3 then + -- Split by chunks + local chunk1, chunk2, chunk3 = + input:span(1, exports.chunk_size), + input:span(exports.chunk_size, exports.chunk_size), + input:span(#input - exports.chunk_size, exports.chunk_size) + local offset1, offset2, offset3 = 0, exports.chunk_size, #input - exports.chunk_size + + match_chunk(chunk1, offset1, log_obj, res) + match_chunk(chunk2, offset2, log_obj, res) + match_chunk(chunk3, offset3, log_obj, res) + else + match_chunk(input, 0, log_obj, res) + end local extensions = lua_util.keys(res) @@ -137,4 +160,8 @@ exports.detect = function(input, log_obj) return nil end +-- This parameter specifies how many bytes are checked in the input +-- Rspamd checks 2 chunks at start and 1 chunk at the end +exports.chunk_size = 16384 + return exports
\ No newline at end of file |