diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-01-16 12:23:44 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-01-16 12:23:44 +0000 |
commit | e7bc102cd470e7cb36335a65c33eaf76707295ea (patch) | |
tree | 63c7952d89bad4b996d918fee3bdd74d48b7b459 /lualib/lua_content/pdf.lua | |
parent | f4afd62f24839b0c30056891f881f153305c2864 (diff) | |
download | rspamd-e7bc102cd470e7cb36335a65c33eaf76707295ea.tar.gz rspamd-e7bc102cd470e7cb36335a65c33eaf76707295ea.zip |
[Project] Lua_content: Add preliminary fonts handling
Diffstat (limited to 'lualib/lua_content/pdf.lua')
-rw-r--r-- | lualib/lua_content/pdf.lua | 127 |
1 files changed, 75 insertions, 52 deletions
diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua index da434b501..460938f8a 100644 --- a/lualib/lua_content/pdf.lua +++ b/lualib/lua_content/pdf.lua @@ -359,6 +359,58 @@ local function dereference_object(elt, pdf) return nil end +-- Apply PDF stream filter +local function apply_pdf_filter(input, filt) + if filt == 'FlateDecode' then + return rspamd_util.inflate(input, config.max_extraction_size) + end + + return nil +end + +-- Conditionally apply a pipeline of stream filters and return uncompressed data +local function maybe_apply_filter(dict, data) + local uncompressed = data + + if dict.Filter then + local filt = dict.Filter + if type(filt) == 'string' then + filt = {filt} + end + + for _,f in ipairs(filt) do + uncompressed = apply_pdf_filter(uncompressed, f) + + if not uncompressed then break end + end + end + + return uncompressed +end + +-- Conditionally extract stream data from object and attach it as obj.uncompressed +local function maybe_extract_object_stream(obj, pdf, task) + local dict = obj.dict + if dict.Filter and dict.Length then + local len = math.min(obj.stream.len, + tonumber(maybe_dereference_object(dict.Length, pdf, task)) or 0) + local real_stream = obj.stream.data:span(1, len) + + local uncompressed = maybe_apply_filter(dict, real_stream) + + if uncompressed then + obj.uncompressed = uncompressed + lua_util.debugm(N, task, 'extracted object %s:%s: (%s -> %s)', + obj.major, obj.minor, len, uncompressed:len()) + return obj.uncompressed + else + lua_util.debugm(N, task, 'cannot extract object %s:%s; len = %s; filter = %s', + obj.major, obj.minor, len, dict.Filter) + end + end +end + + local function parse_object_grammar(obj, task, pdf) -- Parse grammar local obj_dict_span @@ -385,8 +437,8 @@ local function parse_object_grammar(obj, task, pdf) lua_util.debugm(N, task, 'direct object %s:%s is parsed to: %s', obj.major, obj.minor, obj_or_err) else - lua_util.debugm(N, task, 'direct object %s:%s cannot be parsed: %s', - obj.major, obj.minor, obj_dict_span) + obj.dict = {} + obj.uncompressed = obj_or_err end end else @@ -399,6 +451,24 @@ local function parse_object_grammar(obj, task, pdf) end end +-- Extracts font data and process /ToUnicode mappings +local function process_font(task, pdf, font, fname) + local dict = font + if font.dict then + dict = font.dict + end + + if type(dict) == 'table' and dict.ToUnicode then + local cmap = maybe_dereference_object(dict.ToUnicode, pdf, task) + + if cmap and cmap.dict then + maybe_extract_object_stream(cmap, pdf, task) + lua_util.debugm(N, task, 'found cmap for font %s: %s', + fname, cmap.uncompressed) + end + end +end + local function process_dict(task, pdf, obj, dict) if not obj.type and type(dict) == 'table' then if dict.Type and type(dict.Type) == 'string' then @@ -481,8 +551,9 @@ local function process_dict(task, pdf, obj, dict) if obj.fonts[k] then local font = obj.fonts[k] - lua_util.debugm(N, task, 'found font for object %s:%s -> %s', - obj.major, obj.minor, font) + process_font(task, pdf, font, k) + lua_util.debugm(N, task, 'found font "%s" for object %s:%s -> %s', + k, obj.major, obj.minor, font) end end end @@ -520,54 +591,6 @@ local function process_dict(task, pdf, obj, dict) end end -local function apply_pdf_filter(input, filt) - if filt == 'FlateDecode' then - return rspamd_util.inflate(input, config.max_extraction_size) - end - - return nil -end - -local function maybe_apply_filter(dict, data) - local uncompressed = data - - if dict.Filter then - local filt = dict.Filter - if type(filt) == 'string' then - filt = {filt} - end - - for _,f in ipairs(filt) do - uncompressed = apply_pdf_filter(uncompressed, f) - - if not uncompressed then break end - end - end - - return uncompressed -end - -local function maybe_extract_object_stream(obj, pdf, task) - local dict = obj.dict - if dict.Filter and dict.Length then - local len = math.min(obj.stream.len, - tonumber(maybe_dereference_object(dict.Length, pdf, task)) or 0) - local real_stream = obj.stream.data:span(1, len) - - local uncompressed = maybe_apply_filter(dict, real_stream) - - if uncompressed then - obj.uncompressed = uncompressed - lua_util.debugm(N, task, 'extracted object %s:%s: (%s -> %s)', - obj.major, obj.minor, len, uncompressed:len()) - return obj.uncompressed - else - lua_util.debugm(N, task, 'cannot extract object %s:%s; len = %s; filter = %s', - obj.major, obj.minor, len, dict.Filter) - end - end -end - -- This function is intended to unpack objects from ObjStm crappy structure local compound_obj_grammar local function compound_obj_grammar_gen() |