aboutsummaryrefslogtreecommitdiffstats
path: root/lualib
diff options
context:
space:
mode:
Diffstat (limited to 'lualib')
-rw-r--r--lualib/lua_content/pdf.lua48
1 files changed, 45 insertions, 3 deletions
diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua
index 8b77213af..8a1e16ad0 100644
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -1294,9 +1294,51 @@ processors.trailer = function(input, task, positions, output)
end
end
-processors.suspicious = function(_, task, _, output)
- lua_util.debugm(N, task, "pdf: found a suspicious pattern")
- output.suspicious = true
+processors.suspicious = function(input, task, positions, output)
+ local suspicious_factor = 0.0
+ local nexec = 0
+ local nencoded = 0
+ local close_encoded = 0
+ local last_encoded
+ for _,match in ipairs(positions) do
+ if match[2] == 1 then
+ -- netsh
+ suspicious_factor = suspicious_factor + 0.5
+ elseif match[2] == 2 then
+ nexec = nexec + 1
+ else
+ nencoded = nencoded + 1
+
+ if last_encoded then
+ if match[1] - last_encoded < 8 then
+ -- likely consecutive encoded chars, increase factor
+ close_encoded = close_encoded + 1
+ end
+ end
+ last_encoded = match[1]
+ end
+ end
+
+ if nencoded > 10 then
+ suspicious_factor = suspicious_factor + nencoded / 10
+ end
+ if nexec > 1 then
+ suspicious_factor = suspicious_factor + nexec / 2.0
+ end
+ if close_encoded > 4 and nencoded - close_encoded < 5 then
+ -- Too many close encoded comparing to the total number of encoded characters
+ suspicious_factor = suspicious_factor + 0.5
+ end
+
+ lua_util.debugm(N, task, 'pdf: found a suspicious patterns: %s exec, %s encoded (%s close), ' ..
+ '%s final factor',
+ nexec, nencoded, close_encoded, suspicious_factor)
+
+ if suspicious_factor > 1.0 then
+ suspicious_factor = 1.0
+ end
+
+ output.suspicious = suspicious_factor
end
local function generic_table_inserter(positions, output, output_key)