diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-26 20:05:39 +0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-26 20:05:39 +0600 |
commit | 6c0223b32b8fcb6621fa64197214abb400a09f52 (patch) | |
tree | 463eece4775809bf40c43c98109cc909b643b87c | |
parent | 02d0a8b6a9682dadbe0146ac6e875f53cd55c39c (diff) | |
parent | 4e42fd433411b0d91235c2fd71cb90d6c5620341 (diff) | |
download | rspamd-6c0223b32b8fcb6621fa64197214abb400a09f52.tar.gz rspamd-6c0223b32b8fcb6621fa64197214abb400a09f52.zip |
Merge pull request #5233 from rspamd/vstakhov-strip-attachments
Allow to strip attachments
-rw-r--r-- | lualib/lua_mime.lua | 137 | ||||
-rw-r--r-- | lualib/rspamadm/mime.lua | 77 |
2 files changed, 214 insertions, 0 deletions
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua index ea6bf5125..795a803e5 100644 --- a/lualib/lua_mime.lua +++ b/lualib/lua_mime.lua @@ -760,4 +760,141 @@ exports.message_to_ucl_schema = function() } end +--[[[ +-- @function lua_mime.remove_attachments(task, settings) +-- Removes all attachments from a message, keeping only text parts +-- @param {task} task Rspamd task object +-- @param {table} settings Table with the following fields: +-- * keep_images: boolean, whether to keep inline images (default: false) +-- * min_text_size: number, minimum text part size to keep (default: 0) +-- * max_text_size: number, maximum text part size to keep (default: inf) +-- @return {table} modified message state similar to other modification functions: +-- * out: new content (body only) +--]] +exports.remove_attachments = function(task, settings) + local newline_s = newline(task) + local state = { + newline_s = newline_s + } + local out = {} + + settings = settings or {} + local keep_images = settings.keep_images or false + local min_text_size = settings.min_text_size or 0 + local max_text_size = settings.max_text_size or math.huge + + -- Process message structure + local boundaries = {} + local cur_boundary + local has_attachments = false + local parts_to_keep = {} + local parts_indexes_to_keep = {} + + -- First pass: identify parts to keep + for i, part in ipairs(task:get_parts()) do + local keep_part = false + + if part:is_text() and not part:is_attachment() then + local length = part:get_length() + if length >= min_text_size and length <= max_text_size then + keep_part = true + end + elseif keep_images and part:is_image() then + local cd = part:get_header('Content-Disposition') + if cd and cd:lower():match('inline') then + keep_part = true + end + end + + if keep_part then + table.insert(parts_to_keep, part) + parts_indexes_to_keep[i] = true + else + has_attachments = true + end + end + + -- If no attachments found, return false to indicate that no alterations are required + if not has_attachments then + return false + end + + -- Second pass: reconstruct message + for i, part in ipairs(task:get_parts()) do + local boundary = part:get_boundary() + if part:is_multipart() then + if cur_boundary then + out[#out + 1] = { string.format('--%s', + boundaries[#boundaries]), true } + end + + boundaries[#boundaries + 1] = boundary or '--XXX' + cur_boundary = boundary + + local rh = part:get_raw_headers() + if #rh > 0 then + out[#out + 1] = { rh, true } + end + elseif part:is_message() then + if boundary then + if cur_boundary and boundary ~= cur_boundary then + -- Need to close boundary + out[#out + 1] = { string.format('--%s--', + boundaries[#boundaries]), true } + table.remove(boundaries) + cur_boundary = nil + end + out[#out + 1] = { string.format('--%s', + boundary), true } + end + + out[#out + 1] = { part:get_raw_headers(), true } + else + if parts_indexes_to_keep[i] then + if boundary then + if cur_boundary and boundary ~= cur_boundary then + -- Need to close previous boundary + out[#out + 1] = { string.format('--%s--', + boundaries[#boundaries]), true } + table.remove(boundaries) + cur_boundary = boundary + end + out[#out + 1] = { string.format('--%s', + boundary), true } + end + -- Add part headers + local headers = part:get_raw_headers() + + if headers then + out[#out + 1] = { + headers, + true + } + end + + -- Add content + out[#out + 1] = { + part:get_raw_content(), + false + } + end + + end + end + + -- Close remaining boundaries + local b = table.remove(boundaries) + while b do + out[#out + 1] = { string.format('--%s--', b), true } + if #boundaries > 0 then + out[#out + 1] = { '', true } + end + b = table.remove(boundaries) + end + + state.out = out + + return state +end + return exports diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua index 6a589d66a..7750c5a78 100644 --- a/lualib/rspamadm/mime.lua +++ b/lualib/rspamadm/mime.lua @@ -160,6 +160,25 @@ modify:option "-H --html-footer" :description "Adds footer to text/html parts from a specific file" :argname "<file>" +local strip = parser:command "strip" + :description "Strip attachments from a message" +strip:argument "file" + :description "File to process" + :argname "<file>" + :args "+" +strip:flag "-i --keep-images" + :description "Keep images" +strip:option "--min-text-size" + :description "Minimal text size to keep" + :argname "<size>" + :convert(tonumber) + :default(0) +strip:option "--max-text-size" + :description "Max text size to keep" + :argname "<size>" + :convert(tonumber) + :default(math.huge) + local sign = parser:command "sign" :description "Performs DKIM signing" sign:argument "file" @@ -893,6 +912,62 @@ local function sign_handler(opts) end end +local function strip_handler(opts) + load_config(opts) + rspamd_url.init(rspamd_config:get_tld_path()) + + for _, fname in ipairs(opts.file) do + local task = load_task(opts, fname) + local newline_s = newline(task) + + local rewrite = lua_mime.remove_attachments(task, { + keep_images = opts.keep_images, + min_text_size = opts.min_text_size, + max_text_size = opts.max_text_size + }) or {} + local out = {} -- Start with headers + + local function process_headers_cb(name, hdr) + out[#out + 1] = hdr.raw:gsub('\r?\n?$', '') + end + + task:headers_foreach(process_headers_cb, { full = true }) + -- End of headers + out[#out + 1] = '' + + if rewrite.out then + for _, o in ipairs(rewrite.out) do + out[#out + 1] = o + end + else + out[#out + 1] = { task:get_rawbody(), false } + end + + for _, o in ipairs(out) do + if type(o) == 'string' then + io.write(o) + io.write(newline_s) + elseif type(o) == 'table' then + io.flush() + if type(o[1]) == 'string' then + io.write(o[1]) + else + o[1]:save_in_file(1) + end + + if o[2] then + io.write(newline_s) + end + else + o:save_in_file(1) + io.write(newline_s) + end + end + + task:destroy() -- No automatic dtor + end +end + -- Strips directories and .extensions (if present) from a filepath local function filename_only(filepath) local filename = filepath:match(".*%/([^%.]+)") @@ -995,6 +1070,8 @@ local function handler(args) urls_handler(opts) elseif command == 'modify' then modify_handler(opts) + elseif command == 'strip' then + strip_handler(opts) elseif command == 'sign' then sign_handler(opts) elseif command == 'dump' then |