aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-01-19 09:39:06 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-01-19 09:39:06 +0000
commit78ce6c313d9b8d1a104ba15d27363b5303cdc6c8 (patch)
tree1f56667fd185ea894ece20088683433672b7abdf
parent966dfee2cf4f8113116b279ab510670f42d3b07b (diff)
downloadrspamd-78ce6c313d9b8d1a104ba15d27363b5303cdc6c8.tar.gz
rspamd-78ce6c313d9b8d1a104ba15d27363b5303cdc6c8.zip
[Minor] Lua_content: Make text/urls extraction optional
-rw-r--r--lualib/lua_content/pdf.lua16
1 files changed, 13 insertions, 3 deletions
diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua
index 85f939869..b577677e8 100644
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -122,6 +122,8 @@ local exports = {}
local config = {
max_extraction_size = 512 * 1024,
max_processing_size = 32 * 1024,
+ text_extraction = false, -- NYI feature
+ url_extraction = true,
enabled = true,
}
@@ -626,7 +628,11 @@ local function process_dict(task, pdf, obj, dict)
if obj.fonts[k] then
local font = obj.fonts[k]
- process_font(task, pdf, font, k)
+
+ if config.text_extraction then
+ process_font(task, pdf, font, k)
+ end
+
lua_util.debugm(N, task, 'found font "%s" for object %s:%s -> %s',
k, obj.major, obj.minor, font)
end
@@ -1047,8 +1053,12 @@ local function process_pdf(input, _, task)
if pdf_output.start_objects and pdf_output.end_objects then
-- Postprocess objects
postprocess_pdf_objects(task, input, pdf_output)
- search_text(task, pdf_output)
- search_urls(task, pdf_output)
+ if config.text_extraction then
+ search_text(task, pdf_output)
+ end
+ if config.url_extraction then
+ search_urls(task, pdf_output)
+ end
else
pdf_output.flags.no_objects = true
end