aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkorgoth1 <vladislav.stakhov@gmail.com>2019-09-08 19:40:00 +0300
committerkorgoth1 <vladislav.stakhov@gmail.com>2019-09-08 19:40:00 +0300
commitb38a8298263120c857a95522decd31d09cb42504 (patch)
tree0786d34a6eadbde5c0e5e133c582ae0eb31d93c8
parent2b9ee5fee2c7beb9001a6df1b68a11ec724c966d (diff)
parent859483618be3602ea3c405f944595f8c0d06e720 (diff)
downloadrspamd-b38a8298263120c857a95522decd31d09cb42504.tar.gz
rspamd-b38a8298263120c857a95522decd31d09cb42504.zip
[Test] WHITELIST_SURBL
-rw-r--r--lualib/lua_magic/init.lua198
-rw-r--r--lualib/lua_magic/patterns.lua302
-rw-r--r--lualib/lua_magic/types.lua113
-rw-r--r--src/lua/lua_trie.c2
-rw-r--r--test/functional/cases/340_surbl.robot3
-rw-r--r--test/functional/configs/plugins.conf10
-rw-r--r--test/functional/messages/whitelist.eml2
7 files changed, 600 insertions, 30 deletions
diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua
index 464a10d0a..5a4154c79 100644
--- a/lualib/lua_magic/init.lua
+++ b/lualib/lua_magic/init.lua
@@ -24,23 +24,89 @@ local types = require "lua_magic/types"
local fun = require "fun"
local lua_util = require "lua_util"
+local rspamd_text = require "rspamd_text"
local rspamd_trie = require "rspamd_trie"
local N = "lua_magic"
local exports = {}
--- trie object
+-- trie objects
local compiled_patterns
+local compiled_short_patterns
+local compiled_tail_patterns
-- {<str>, <match_object>, <pattern_object>} indexed by pattern number
local processed_patterns = {}
+local short_patterns = {}
+local tail_patterns = {}
+
+local short_match_limit = 128
+local max_short_offset = -1
+local min_tail_offset = math.huge
+
+local function process_patterns(log_obj)
+ -- Add pattern to either short patterns or to normal patterns
+ local function add_processed(str, match, pattern)
+ if match.position and type(match.position) == 'number' then
+ if match.tail then
+ -- Tail pattern
+ tail_patterns[#tail_patterns + 1] = {
+ str, match, pattern
+ }
+ if min_tail_offset > match.tail then
+ min_tail_offset = match.tail
+ end
+
+ lua_util.debugm(N, log_obj, 'add tail pattern %s for ext %s',
+ str, pattern.ext)
+ elseif match.position < short_match_limit then
+ short_patterns[#short_patterns + 1] = {
+ str, match, pattern
+ }
+ lua_util.debugm(N, log_obj, 'add short pattern %s for ext %s',
+ str, pattern.ext)
+
+ if max_short_offset < match.position then
+ max_short_offset = match.position
+ end
+ else
+ processed_patterns[#processed_patterns + 1] = {
+ str, match, pattern
+ }
+
+ lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s',
+ str, pattern.ext)
+ end
+ else
+ processed_patterns[#processed_patterns + 1] = {
+ str, match, pattern
+ }
+
+ lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s',
+ str, pattern.ext)
+ end
+ end
-local function process_patterns()
if not compiled_patterns then
- for _,pattern in ipairs(patterns) do
+ for ext,pattern in pairs(patterns) do
+ assert(types[ext], 'not found type: ' .. ext)
+ pattern.ext = ext
for _,match in ipairs(pattern.matches) do
if match.string then
- processed_patterns[#processed_patterns + 1] = {
- match.string, match, pattern
- }
+ if match.relative_position and not match.position then
+ match.position = match.relative_position + #match.string
+ end
+ add_processed(match.string, match, pattern)
+ elseif match.hex then
+ local hex_table = {}
+
+ for i=1,#match.hex,2 do
+ local subc = match.hex:sub(i, i + 1)
+ hex_table[#hex_table + 1] = string.format('\\x{%s}', subc)
+ end
+
+ if match.relative_position and not match.position then
+ match.position = match.relative_position + #match.hex / 2
+ end
+ add_processed(table.concat(hex_table), match, pattern)
end
end
end
@@ -49,18 +115,26 @@ local function process_patterns()
fun.map(function(t) return t[1] end, processed_patterns)),
rspamd_trie.flags.re
)
+ compiled_short_patterns = rspamd_trie.create(fun.totable(
+ fun.map(function(t) return t[1] end, short_patterns)),
+ rspamd_trie.flags.re
+ )
+ compiled_tail_patterns = rspamd_trie.create(fun.totable(
+ fun.map(function(t) return t[1] end, tail_patterns)),
+ rspamd_trie.flags.re
+ )
- lua_util.debugm(N, rspamd_config, 'compiled %s patterns',
- #processed_patterns)
+ lua_util.debugm(N, log_obj,
+ 'compiled %s (%s short; %s long; %s tail) patterns',
+ #processed_patterns + #short_patterns + #tail_patterns,
+ #short_patterns, #processed_patterns, #tail_patterns)
end
end
-exports.detect = function(input, log_obj)
- process_patterns()
- local res = {}
- local matches = compiled_patterns:match(input)
+local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, res)
+ local matches = trie:match(input)
- if not log_obj then log_obj = rspamd_config end
+ local last = tlen
local function add_result(match, pattern)
if not res[pattern.ext] then
@@ -77,7 +151,7 @@ exports.detect = function(input, log_obj)
end
for npat,matched_positions in pairs(matches) do
- local pat_data = processed_patterns[npat]
+ local pat_data = processed_tbl[npat]
local pattern = pat_data[3]
local match = pat_data[2]
@@ -99,6 +173,10 @@ exports.detect = function(input, log_obj)
expected = expected[2]
end
+ -- Tail match
+ if expected < 0 then
+ expected = last + expected + 1
+ end
return cmp(pos, expected)
end
-- Single position
@@ -106,23 +184,39 @@ exports.detect = function(input, log_obj)
local position = match.position
for _,pos in ipairs(matched_positions) do
- if match_position(pos, position) then
+ lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)',
+ pattern.ext, pos, offset)
+ if match_position(pos + offset, position) then
add_result(match, pattern)
+ break
end
end
end
-- Match all positions
if match.positions then
+ local all_right = true
for _,position in ipairs(match.positions) do
+ local matched = false
for _,pos in ipairs(matched_positions) do
- if match_position(pos, position) then
- add_result(match, pattern)
+ if not match_position(pos + offset, position) then
+ matched = true
+ break
end
end
+ if not matched then
+ all_right = false
+ break
+ end
+ end
+
+ if all_right then
+ add_result(match, pattern)
end
end
end
+end
+local function process_detected(res)
local extensions = lua_util.keys(res)
if #extensions > 0 then
@@ -130,6 +224,72 @@ exports.detect = function(input, log_obj)
return res[ex1] > res[ex2]
end)
+ return extensions,res[extensions[1]]
+ end
+
+ return nil
+end
+
+exports.detect = function(input, log_obj)
+ if not log_obj then log_obj = rspamd_config end
+ process_patterns(log_obj)
+
+ local res = {}
+
+ if type(input) == 'string' then
+ -- Convert to rspamd_text
+ input = rspamd_text.fromstring(input)
+ end
+
+
+ if type(input) == 'userdata' then
+ local inplen = #input
+
+ -- Check tail matches
+ if inplen > min_tail_offset then
+ local tail = input:span(inplen - min_tail_offset, min_tail_offset)
+ match_chunk(tail, inplen, inplen - min_tail_offset,
+ compiled_tail_patterns, tail_patterns, log_obj, res)
+ end
+
+ -- Try short match
+ local head = input:span(1, math.min(max_short_offset, inplen))
+ match_chunk(head, inplen, 0,
+ compiled_short_patterns, short_patterns, log_obj, res)
+
+ -- Check if we have enough data or go to long patterns
+ local extensions,confidence = process_detected(res)
+
+ if extensions and #extensions > 0 and confidence > 30 then
+ -- We are done on short patterns
+ return extensions[1],types[extensions[1]]
+ end
+
+ -- No way, let's check data in chunks or just the whole input if it is small enough
+ if #input > exports.chunk_size * 3 then
+ -- Chunked version as input is too long
+ local chunk1, chunk2 =
+ input:span(1, exports.chunk_size * 2),
+ input:span(inplen - exports.chunk_size, exports.chunk_size)
+ local offset1, offset2 = 0, inplen - exports.chunk_size
+
+ match_chunk(chunk1, inplen,
+ offset1, compiled_patterns, processed_patterns, log_obj, res)
+ match_chunk(chunk2, inplen,
+ offset2, compiled_patterns, processed_patterns, log_obj, res)
+ else
+ -- Input is short enough to match it at all
+ match_chunk(input, inplen, 0,
+ compiled_patterns, processed_patterns, log_obj, res)
+ end
+ else
+ -- Table input is NYI
+ assert(0, 'table input for match')
+ end
+
+ local extensions = process_detected(res)
+
+ if extensions and #extensions > 0 then
return extensions[1],types[extensions[1]]
end
@@ -137,4 +297,8 @@ exports.detect = function(input, log_obj)
return nil
end
+-- This parameter specifies how many bytes are checked in the input
+-- Rspamd checks 2 chunks at start and 1 chunk at the end
+exports.chunk_size = 32768
+
return exports \ No newline at end of file
diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua
index 354f8ec61..003073cab 100644
--- a/lualib/lua_magic/patterns.lua
+++ b/lualib/lua_magic/patterns.lua
@@ -20,14 +20,12 @@ limitations under the License.
--]]
local patterns = {
- {
- -- MSDOS extension to match types table
- ext = 'pdf',
+ pdf = {
-- These are alternatives
matches = {
{
string = [[%PDF-\d]],
- position = 6, -- must be end of the match, as that's how hyperscan works
+ position = 6, -- must be end of the match, as that's how hyperscan works (or use relative_position)
weight = 60,
},
{
@@ -41,7 +39,301 @@ local patterns = {
weight = 60,
},
},
- }
+ },
+ ps = {
+ matches = {
+ {
+ string = [[%!PS-Adobe]],
+ relative_position = 0,
+ weight = 60,
+ },
+ },
+ },
+ -- RTF document
+ rtf = {
+ matches = {
+ {
+ string = [[{\\rtf\d]],
+ position = 6,
+ weight = 60,
+ }
+ }
+ },
+ chm = {
+ matches = {
+ {
+ string = [[ITSF]],
+ relative_position = 0,
+ weight = 60,
+ }
+ }
+ },
+ djvu = {
+ matches = {
+ {
+ string = [[AT&TFORM]],
+ relative_position = 0,
+ weight = 60,
+ },
+ {
+ string = [[DJVM]],
+ relative_position = 0x0c,
+ weight = 60,
+ }
+ }
+ },
+ -- MS Exe file
+ exe = {
+ matches = {
+ {
+ string = [[MZ]],
+ relative_position = 0,
+ weight = 10,
+ },
+ -- PE part
+ {
+ string = [[PE\x{00}\x{00}]],
+ position = {'>=', 0x3c + 4},
+ weight = 40,
+ }
+ }
+ },
+ elf = {
+ matches = {
+ {
+ hex = [[7f454c46]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ lnk = {
+ matches = {
+ {
+ hex = [[4C0000000114020000000000C000000000000046]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ class = {
+ -- Technically, this also matches MachO files, but I don't care about
+ -- Apple and their mental health problems here: just consider Java files,
+ -- Mach object files and all other cafe babes as bad and block them!
+ matches = {
+ {
+ hex = [[cafebabe]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ -- Archives
+ arj = {
+ matches = {
+ {
+ hex = '60EA',
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ ace = {
+ matches = {
+ {
+ string = [[\*\*ACE\*\*]],
+ position = 14,
+ weight = 60,
+ },
+ }
+ },
+ cab = {
+ matches = {
+ {
+ hex = [[4d53434600000000]], -- Can be anywhere for SFX :(
+ position = {'>=', 8},
+ weight = 60,
+ },
+ }
+ },
+ tar = {
+ matches = {
+ {
+ string = [[ustar]],
+ relative_position = 257,
+ weight = 60,
+ },
+ }
+ },
+ bz2 = {
+ matches = {
+ {
+ string = "BZ[h0]",
+ position = 3,
+ weight = 60,
+ },
+ }
+ },
+ lz4 = {
+ matches = {
+ {
+ hex = "184d2204",
+ relative_position = 0,
+ weight = 60,
+ },
+ {
+ hex = "184c2103",
+ relative_position = 0,
+ weight = 60,
+ },
+ {
+ hex = "184c2102",
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ zst = {
+ matches = {
+ {
+ string = [[\x{FD}\x{2F}\x{B5}[\x{22}-\x{40}].]],
+ position = 5, -- includes last .
+ weight = 60,
+ },
+ }
+ },
+ iso = {
+ matches = {
+ {
+ string = [[\x{01}CD001\x{01}]],
+ position = {'>=', 0x8000 + 7}, -- first 32k is unused
+ weight = 60,
+ },
+ }
+ },
+ -- Apple is a 'special' child: this needs to be matched at the data tail...
+ dmg = {
+ matches = {
+ {
+ string = [[koly]],
+ position = -512 + 4,
+ weight = 61,
+ tail = 512,
+ },
+ }
+ },
+ szdd = {
+ matches = {
+ {
+ hex = [[535a4444]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ xz = {
+ matches = {
+ {
+ hex = [[FD377A585A00]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ -- Images
+ psd = {
+ matches = {
+ {
+ string = [[8BPS]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ ico = {
+ matches = {
+ {
+ hex = [[00000100]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ pcx = {
+ matches = {
+ {
+ hex = [[0A050108]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ pic = {
+ matches = {
+ {
+ hex = [[FF80C9C71A00]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ swf = {
+ matches = {
+ {
+ hex = [[5a5753]], -- LZMA
+ relative_position = 0,
+ weight = 60,
+ },
+ {
+ hex = [[435753]], -- Zlib
+ relative_position = 0,
+ weight = 60,
+ },
+ {
+ hex = [[465753]], -- Uncompressed
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ tiff = {
+ matches = {
+ {
+ hex = [[49492a00]], -- LE encoded
+ relative_position = 0,
+ weight = 60,
+ },
+ {
+ hex = [[4d4d]], -- BE tiff
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ -- Other
+ pgp = {
+ matches = {
+ {
+ hex = [[A803504750]],
+ relative_position = 0,
+ weight = 60,
+ },
+ {
+ hex = [[2D424547494E20504750204D4553534147452D]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
+ uue = {
+ matches = {
+ {
+ hex = [[626567696e20]],
+ relative_position = 0,
+ weight = 60,
+ },
+ }
+ },
}
return patterns \ No newline at end of file
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index 746c87400..b3af668c8 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -22,18 +22,123 @@ limitations under the License.
-- This table is indexed by msdos extension for convenience
local types = {
+ -- exe
+ exe = {
+ ct = 'application/x-ms-application',
+ type = 'executable',
+ },
+ elf = {
+ ct = 'application/x-elf-executable',
+ type = 'executable',
+ },
+ lnk = {
+ ct = 'application/x-ms-application',
+ type = 'executable',
+ },
+ class = {
+ ct = 'application/x-java-applet',
+ type = 'executable',
+ },
+ -- text
+ rtf = {
+ ct = "application/rtf",
+ type = 'text',
+ },
pdf = {
ct = 'application/pdf',
type = 'binary',
},
- exe = {
- ct = 'application/x-ms-application',
- type = 'executable',
+ ps = {
+ ct = 'application/postscript',
+ type = 'binary',
+ },
+ chm = {
+ ct = 'application/x-chm',
+ type = 'binary',
+ },
+ djvu = {
+ ct = 'application/x-djvu',
+ type = 'binary',
+ },
+ -- archives
+ arj = {
+ ct = 'application/x-arj',
+ type = 'archive',
+ },
+ cab = {
+ ct = 'application/x-cab',
+ type = 'archive',
+ },
+ ace = {
+ ct = 'application/x-ace',
+ type = 'archive',
+ },
+ tar = {
+ ct = 'application/x-tar',
+ type = 'archive',
+ },
+ bz2 = {
+ ct = 'application/x-bzip',
+ type = 'archive',
+ },
+ xz = {
+ ct = 'application/x-xz',
+ type = 'archive',
+ },
+ lz4 = {
+ ct = 'application/x-lz4',
+ type = 'archive',
+ },
+ zst = {
+ ct = 'application/x-zstandard',
+ type = 'archive',
+ },
+ dmg = {
+ ct = 'application/x-dmg',
+ type = 'archive',
+ },
+ iso = {
+ ct = 'application/x-iso',
+ type = 'archive',
+ },
+ szdd = { -- in fact, their MSDOS extension is like FOO.TX_ or FOO.TX$
+ ct = 'application/x-compressed',
+ type = 'archive',
+ },
+ -- images
+ psd = {
+ ct = 'image/psd',
+ type = 'image',
+ },
+ pcx = {
+ ct = 'image/pcx',
+ type = 'image',
+ },
+ pic = {
+ ct = 'image/pic',
+ type = 'image',
},
tiff = {
ct = 'image/tiff',
type = 'image',
- }
+ },
+ ico = {
+ ct = 'image/ico',
+ type = 'image',
+ },
+ swf = {
+ ct = 'application/x-shockwave-flash',
+ type = 'image',
+ },
+ -- other
+ pgp = {
+ ct = 'application/encrypted',
+ type = 'encrypted'
+ },
+ uue = {
+ ct = 'application/x-uuencoded',
+ type = 'binary',
+ },
}
return types \ No newline at end of file
diff --git a/src/lua/lua_trie.c b/src/lua/lua_trie.c
index b030c735a..82d11c50c 100644
--- a/src/lua/lua_trie.c
+++ b/src/lua/lua_trie.c
@@ -290,7 +290,7 @@ lua_trie_match (lua_State *L)
}
}
else if (lua_type (L, 2) == LUA_TUSERDATA) {
- t = lua_check_text (L, -1);
+ t = lua_check_text (L, 2);
if (t && lua_trie_search_str (L, trie, t->start, t->len, cb)) {
found = TRUE;
diff --git a/test/functional/cases/340_surbl.robot b/test/functional/cases/340_surbl.robot
index 232914932..ee80577cf 100644
--- a/test/functional/cases/340_surbl.robot
+++ b/test/functional/cases/340_surbl.robot
@@ -94,7 +94,8 @@ SURBL example.com encoded url in subject
WHITELIST
${result} = Scan Message With Rspamc ${TESTDIR}/messages/whitelist.eml
- Should Contain ${result.stdout} RSPAMD_URIBL (
+ Should Not Contain ${result.stdout} RSPAMD_URIBL (
+ Should Not Contain ${result.stdout} DBL_SPAM (
*** Keywords ***
Surbl Setup
diff --git a/test/functional/configs/plugins.conf b/test/functional/configs/plugins.conf
index ac68ec5cb..839e14257 100644
--- a/test/functional/configs/plugins.conf
+++ b/test/functional/configs/plugins.conf
@@ -580,6 +580,16 @@ options = {
replies = ["127.0.0.4", "127.0.0.11"];
},
{
+ name = "rspamd-test.com.test.uribl";
+ type = a;
+ replies = ["127.0.0.2"];
+ },
+ {
+ name = "rspamd-test.com.test2.uribl";
+ type = a;
+ replies = ["127.0.1.2"];
+ },
+ {
name = "9.8.8.8.test4.uribl";
type = a;
replies = ["127.0.0.3"];
diff --git a/test/functional/messages/whitelist.eml b/test/functional/messages/whitelist.eml
index 24686a247..aa19512a1 100644
--- a/test/functional/messages/whitelist.eml
+++ b/test/functional/messages/whitelist.eml
@@ -1,5 +1,3 @@
Content-Type: text/plain
-http://rspamd.com
-http://test.rspamd.example.com
http://rspamd-test.com