js_fuzzy = true, -- Generate fuzzy hashes from PDF javascripts
min_js_fuzzy = 32, -- Minimum size of js to be considered as a fuzzy
openaction_fuzzy_only = false, -- Generate fuzzy from all scripts
+ max_pdf_objects = 10000, -- Maximum number of objects to be considered
+ max_pdf_trailer = 10 * 1024 * 1024, -- Maximum trailer size (to avoid abuse)
+ max_pdf_trailer_lines = 100, -- Maximum number of lines in pdf trailer
}
-- Used to process patterns found in PDF
-- set of objects
local function extract_outer_objects(task, input, pdf)
local start_pos, end_pos = 1, 1
+ local max_start_pos, max_end_pos
local obj_count = 0
+ max_start_pos = math.min(config.max_pdf_objects, #pdf.start_objects)
+ max_end_pos = math.min(config.max_pdf_objects, #pdf.end_objects)
lua_util.debugm(N, task, "pdf: extract objects from %s start positions and %s end positions",
- #pdf.start_objects, #pdf.end_objects)
+ max_start_pos, max_end_pos)
- while start_pos <= #pdf.start_objects and end_pos <= #pdf.end_objects do
+ while start_pos <= max_start_pos and end_pos <= max_end_pos do
local first = pdf.start_objects[start_pos]
local last = pdf.end_objects[end_pos]
local function attach_pdf_streams(task, input, pdf)
if pdf.start_streams and pdf.end_streams then
local start_pos, end_pos = 1, 1
+ local max_start_pos, max_end_pos
+ local obj_count = 0
+
+ max_start_pos = math.min(config.max_pdf_objects, #pdf.start_streams)
+ max_end_pos = math.min(config.max_pdf_objects, #pdf.end_streams)
for _,obj in ipairs(pdf.objects) do
- while start_pos <= #pdf.start_streams and end_pos <= #pdf.end_streams do
+ while start_pos <= max_start_pos and end_pos <= max_end_pos do
local first = pdf.start_streams[start_pos]
local last = pdf.end_streams[end_pos]
last = last - 10 -- Exclude endstream\n pattern
pdf_output.flags = {}
if pdf_output.start_objects and pdf_output.end_objects then
+ if #pdf_output.start_objects > config.max_pdf_objects then
+ pdf_output.many_objects = #pdf_output.start_objects
+ -- Trim
+ end
+
-- Postprocess objects
postprocess_pdf_objects(task, input, pdf_output)
if config.text_extraction then
lua_util.debugm(N, task, 'pdf: process trailer at position %s (%s total length)',
last_pos, #input)
+ if last_pos[1] > config.max_pdf_trailer then
+ output.long_trailer = #input - last_pos[1]
+ return
+ end
+
local last_span = input:span(last_pos[1])
local lines_checked = 0
for line in last_span:lines(true) do
end
lines_checked = lines_checked + 1
- if lines_checked > 100 then
+ if lines_checked > config.max_pdf_trailer_lines then
lua_util.debugm(N, task, "pdf: trailer has too many lines, stop checking")
+ output.long_trailer = #input - last_pos[1]
break
end
end