summaryrefslogtreecommitdiffstats
path: root/lualib/lua_magic/heuristics.lua
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-05-11 16:31:30 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-05-11 16:31:30 +0100
commitb56b5fc2248f1c0ff6d57033935df97e7441f30e (patch)
treec30986b7907d24c6e232ce8367ef84199b699b61 /lualib/lua_magic/heuristics.lua
parentd4f4efdef40b07b452ef12b33cf192be51e0ef4f (diff)
downloadrspamd-b56b5fc2248f1c0ff6d57033935df97e7441f30e.tar.gz
rspamd-b56b5fc2248f1c0ff6d57033935df97e7441f30e.zip
[Minor] Improve pdf magic detection
Diffstat (limited to 'lualib/lua_magic/heuristics.lua')
-rw-r--r--lualib/lua_magic/heuristics.lua23
1 files changed, 19 insertions, 4 deletions
diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index c60824bd8..678ca1b6d 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -147,7 +147,7 @@ end
-- Call immediately on require
compile_tries()
-local function detect_ole_format(input, log_obj)
+local function detect_ole_format(input, log_obj, _, part)
local inplen = #input
if inplen < 0x31 + 4 then
lua_util.debugm(N, log_obj, "short length: %s", inplen)
@@ -245,7 +245,7 @@ local function process_top_detected(res)
return nil
end
-local function detect_archive_flaw(part, arch, log_obj)
+local function detect_archive_flaw(part, arch, log_obj, _)
local arch_type = arch:get_type()
local res = {
docx = 0,
@@ -312,7 +312,7 @@ local function detect_archive_flaw(part, arch, log_obj)
return arch_type:lower(),40
end
-exports.mime_part_heuristic = function(part, log_obj)
+exports.mime_part_heuristic = function(part, log_obj, _)
if part:is_archive() then
local arch = part:get_archive()
return detect_archive_flaw(part, arch, log_obj)
@@ -321,7 +321,7 @@ exports.mime_part_heuristic = function(part, log_obj)
return nil
end
-exports.text_part_heuristic = function(part, log_obj)
+exports.text_part_heuristic = function(part, log_obj, _)
-- We get some span of data and check it
local function is_span_text(span)
local function rough_utf8_check(bytes, idx, remain)
@@ -436,4 +436,19 @@ exports.text_part_heuristic = function(part, log_obj)
end
end
+exports.pdf_format_heuristic = function(input, log_obj, pos, part)
+ local weight = 10
+ local ext = string.match(part:get_filename() or '', '%.([^.]+)$')
+ -- If we found a pattern at the beginning
+ if pos <= 10 then
+ weight = weight + 30
+ end
+ -- If the announced extension is `pdf`
+ if ext and ext:lower() == 'pdf' then
+ weight = weight + 30
+ end
+
+ return 'pdf',weight
+end
+
return exports \ No newline at end of file