From ae4fee72512a84682ec7d54d17e68e36aac5f2de Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 5 Jan 2020 09:54:18 +0000 Subject: [PATCH] [Project] Lua_content: Ignore fonts --- lualib/lua_content/pdf.lua | 83 ++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 13 deletions(-) diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua index dd87a4b44..893b55e8e 100644 --- a/lualib/lua_content/pdf.lua +++ b/lualib/lua_content/pdf.lua @@ -201,6 +201,70 @@ local function obj_ref(major, minor) return major * 10.0 + 1.0 / (minor + 1.0) end +-- Return indirect object reference (if needed) +local function maybe_dereference_object(elt, pdf) + if type(elt) == 'table' and elt[1] == '%REF%' then + local ref = obj_ref(elt[2], elt[3]) + + if pdf.ref[ref] then + -- No recursion! + return pdf.ref[ref].dict + end + end + + return elt +end + +-- Enforced dereference +local function dereference_object(elt, pdf) + if type(elt) == 'table' and elt[1] == '%REF%' then + local ref = obj_ref(elt[2], elt[3]) + + if pdf.ref[ref] then + -- Not a dict but the object! + return pdf.ref[ref] + end + end + + return nil +end + +local function process_dict(task, pdf, obj, dict) + if type(dict) == 'table' and dict.Type then + if dict.Type == 'FontDescriptor' then + obj.type = 'font' + obj.ignore = true + + lua_util.debugm(N, task, "obj %s:%s is a font descriptor", + obj.major, obj.minor) + + local stream_ref + if dict.FontFile then + stream_ref = dereference_object(dict.FontFile, pdf) + end + if dict.FontFile2 then + stream_ref = dereference_object(dict.FontFile2, pdf) + end + if dict.FontFile3 then + stream_ref = dereference_object(dict.FontFile3, pdf) + end + + if stream_ref then + if not stream_ref.dict then + stream_ref.dict = {} + end + stream_ref.dict.type = 'font_data' + stream_ref.dict.ignore = true + + lua_util.debugm(N, task, "obj %s:%s is a font data stream", + stream_ref.major, stream_ref.minor) + end + end + end +end + +-- Processes PDF objects: extracts streams, object numbers, process outer grammar, +-- augment object types local function postprocess_pdf_objects(task, input, pdf) local start_pos, end_pos = 1, 1 @@ -323,21 +387,14 @@ local function postprocess_pdf_objects(task, input, pdf) end - pdf.objects = objects -end - --- Return indirect object reference (if needed) -local function maybe_dereference_object(elt, pdf) - if type(elt) == 'table' and elt[1] == '%REF%' then - local ref = obj_ref(elt[2], elt[3]) - - if pdf.ref[ref] then - -- No recursion! - return pdf.ref[ref].dict + for _,obj in ipairs(objects) do + if obj.dict then + -- Types processing + process_dict(task, pdf, obj, obj.dict) end end - return elt + pdf.objects = objects end local function extract_pdf_objects(task, pdf) @@ -367,7 +424,7 @@ local function extract_pdf_objects(task, pdf) end for _,obj in ipairs(pdf.objects or {}) do - if obj.stream and obj.dict and type(obj.dict) == 'table' then + if obj.stream and obj.dict and type(obj.dict) == 'table' and not obj.dict.ignore then maybe_extract_object(obj) end end -- 2.39.5