diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-01-19 09:39:06 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-01-19 09:39:06 +0000 |
commit | 78ce6c313d9b8d1a104ba15d27363b5303cdc6c8 (patch) | |
tree | 1f56667fd185ea894ece20088683433672b7abdf | |
parent | 966dfee2cf4f8113116b279ab510670f42d3b07b (diff) | |
download | rspamd-78ce6c313d9b8d1a104ba15d27363b5303cdc6c8.tar.gz rspamd-78ce6c313d9b8d1a104ba15d27363b5303cdc6c8.zip |
[Minor] Lua_content: Make text/urls extraction optional
-rw-r--r-- | lualib/lua_content/pdf.lua | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua index 85f939869..b577677e8 100644 --- a/lualib/lua_content/pdf.lua +++ b/lualib/lua_content/pdf.lua @@ -122,6 +122,8 @@ local exports = {} local config = { max_extraction_size = 512 * 1024, max_processing_size = 32 * 1024, + text_extraction = false, -- NYI feature + url_extraction = true, enabled = true, } @@ -626,7 +628,11 @@ local function process_dict(task, pdf, obj, dict) if obj.fonts[k] then local font = obj.fonts[k] - process_font(task, pdf, font, k) + + if config.text_extraction then + process_font(task, pdf, font, k) + end + lua_util.debugm(N, task, 'found font "%s" for object %s:%s -> %s', k, obj.major, obj.minor, font) end @@ -1047,8 +1053,12 @@ local function process_pdf(input, _, task) if pdf_output.start_objects and pdf_output.end_objects then -- Postprocess objects postprocess_pdf_objects(task, input, pdf_output) - search_text(task, pdf_output) - search_urls(task, pdf_output) + if config.text_extraction then + search_text(task, pdf_output) + end + if config.url_extraction then + search_urls(task, pdf_output) + end else pdf_output.flags.no_objects = true end |