diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-05-22 14:02:26 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-05-22 14:02:26 +0100 |
commit | 3b3de857f12e0b13b060c8fffccd4923abca3631 (patch) | |
tree | 2681033bccd99ad70b72bfd72fb5c41591e9b72e | |
parent | 2fa03199e4bcf3d323d5c94ec7a16bb2890e0354 (diff) | |
download | rspamd-3b3de857f12e0b13b060c8fffccd4923abca3631.tar.gz rspamd-3b3de857f12e0b13b060c8fffccd4923abca3631.zip |
[Feature] PDF: Add timeouts for expensive operations
-rw-r--r-- | conf/scores.d/content_group.conf | 5 | ||||
-rw-r--r-- | lualib/lua_content/pdf.lua | 54 | ||||
-rw-r--r-- | rules/content.lua | 10 | ||||
-rw-r--r-- | src/lua/lua_task.c | 3 |
4 files changed, 67 insertions, 5 deletions
diff --git a/conf/scores.d/content_group.conf b/conf/scores.d/content_group.conf index 6a011b938..56255bea0 100644 --- a/conf/scores.d/content_group.conf +++ b/conf/scores.d/content_group.conf @@ -43,5 +43,10 @@ symbols = { description = "There is a PDF file with too many objects"; one_shot = true; } + "PDF_TIMEOUT" { + weight = 0; + description = "There is a PDF file that caused timeout in processing"; + one_shot = true; + } } diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua index 2e7e74063..120ca5111 100644 --- a/lualib/lua_content/pdf.lua +++ b/lualib/lua_content/pdf.lua @@ -119,6 +119,7 @@ local config = { max_pdf_objects = 10000, -- Maximum number of objects to be considered max_pdf_trailer = 10 * 1024 * 1024, -- Maximum trailer size (to avoid abuse) max_pdf_trailer_lines = 100, -- Maximum number of lines in pdf trailer + pdf_process_timeout = 1.0, -- Timeout in seconds for processing } -- Used to process patterns found in PDF @@ -809,7 +810,19 @@ end -- PDF 1.5 ObjStmt local function extract_pdf_compound_objects(task, pdf) - for _,obj in ipairs(pdf.objects or {}) do + for i,obj in ipairs(pdf.objects or {}) do + if i > 0 and i % 100 == 0 then + local now = rspamd_util.get_ticks() + + if now >= pdf.end_timestamp then + pdf.timeout_processing = now - pdf.start_timestamp + + lua_util.debugm(N, task, 'pdf: timeout processing compound objects after spending %s seconds, ' .. + '%s elements processed', + pdf.timeout_processing, i) + break + end + end if obj.stream and obj.dict and type(obj.dict) == 'table' then local t = obj.dict.Type if t and t == 'ObjStm' then @@ -965,17 +978,47 @@ local function postprocess_pdf_objects(task, input, pdf) -- Now we have objects and we need to attach streams that are in bounds attach_pdf_streams(task, input, pdf) -- Parse grammar for outer objects - for _,obj in ipairs(pdf.objects) do + for i,obj in ipairs(pdf.objects) do + if i > 0 and i % 100 == 0 then + local now = rspamd_util.get_ticks() + + if now >= pdf.end_timestamp then + pdf.timeout_processing = now - pdf.start_timestamp + + lua_util.debugm(N, task, 'pdf: timeout processing grammars after spending %s seconds, ' .. + '%s elements processed', + pdf.timeout_processing, i) + break + end + end if obj.ref then parse_object_grammar(obj, task, pdf) end end - extract_pdf_compound_objects(task, pdf) + + if not pdf.timeout_processing then + extract_pdf_compound_objects(task, pdf) + else + -- ENOTIME + return + end -- Now we might probably have all objects being processed - for _,obj in ipairs(pdf.objects) do + for i,obj in ipairs(pdf.objects) do if obj.dict then -- Types processing + if i > 0 and i % 100 == 0 then + local now = rspamd_util.get_ticks() + + if now >= pdf.end_timestamp then + pdf.timeout_processing = now - pdf.start_timestamp + + lua_util.debugm(N, task, 'pdf: timeout processing dicts after spending %s seconds, ' .. + '%s elements processed', + pdf.timeout_processing, i) + break + end + end process_dict(task, pdf, obj, obj.dict) end end @@ -1112,9 +1155,12 @@ local function process_pdf(input, mpart, task) local matches = pdf_trie:match(input) if matches then + local start_ts = rspamd_util.get_ticks() local pdf_output = { tag = 'pdf', extract_text = extract_text_data, + start_timestamp = start_ts, + end_timestamp = start_ts + config.pdf_process_timeout, } local grouped_processors = {} for npat,matched_positions in pairs(matches) do diff --git a/rules/content.lua b/rules/content.lua index 5bdc46c25..d95eeec63 100644 --- a/rules/content.lua +++ b/rules/content.lua @@ -46,6 +46,10 @@ local function process_pdf_specific(task, part, specific) task:insert_result('PDF_MANY_OBJECTS', 1.0, string.format('%s:%d', part:get_filename() or 'unknown', specific.many_objects)) end + if specific.timeout_processing then + task:insert_result('PDF_TIMEOUT', 1.0, string.format('%s:%.3f', + part:get_filename() or 'unknown', specific.timeout_processing)) + end end local tags_processors = { @@ -104,3 +108,9 @@ rspamd_config:register_symbol{ parent = id, groups = {"content", "pdf"}, } +rspamd_config:register_symbol{ + type = 'virtual', + name = 'PDF_TIMEOUT', + parent = id, + groups = {"content", "pdf"}, +}
\ No newline at end of file diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 98c0b06ed..d7808ee5f 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -759,8 +759,9 @@ LUA_FUNCTION_DEF (task, get_date); */ LUA_FUNCTION_DEF (task, get_message_id); /*** - * @method task:get_timeval() + * @method task:get_timeval([raw]) * Returns the timestamp for a task start processing time. + * @param {boolean} raw if true then two float numbers are returned: task start timestamp and timeout event timestamp * @return {table} table with fields as described in `struct timeval` in C */ LUA_FUNCTION_DEF (task, get_timeval); |