diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-25 11:55:25 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-25 11:55:25 +0000 |
commit | 84213d0380223c7e1ec4a8b2df84d93e6daa18bf (patch) | |
tree | c8d2d1a087906a82a2e8270215c3503990c2a530 | |
parent | 02d0a8b6a9682dadbe0146ac6e875f53cd55c39c (diff) | |
download | rspamd-84213d0380223c7e1ec4a8b2df84d93e6daa18bf.tar.gz rspamd-84213d0380223c7e1ec4a8b2df84d93e6daa18bf.zip |
[Project] Add routine to strip attachments
-rw-r--r-- | lualib/lua_mime.lua | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua index ea6bf5125..71867fbed 100644 --- a/lualib/lua_mime.lua +++ b/lualib/lua_mime.lua @@ -760,4 +760,164 @@ exports.message_to_ucl_schema = function() } end +--[[[ +-- @function lua_mime.remove_attachments(task, settings) +-- Removes all attachments from a message, keeping only text parts +-- @param {task} task Rspamd task object +-- @param {table} settings Table with the following fields: +-- * keep_images: boolean, whether to keep inline images (default: false) +-- * min_text_size: number, minimum text part size to keep (default: 0) +-- * max_text_size: number, maximum text part size to keep (default: inf) +-- @return {table} modified message state similar to other modification functions: +-- * out: new content (body only) +-- * need_rewrite_ct: boolean field that means if we must rewrite content type +-- * new_ct: new content type (type => string, subtype => string) +-- * new_cte: new content-transfer encoding (string) +--]] +exports.remove_attachments = function(task, settings) + local newline_s = newline(task) + local state = { + newline_s = newline_s + } + local out = {} + + settings = settings or {} + local keep_images = settings.keep_images or false + local min_text_size = settings.min_text_size or 0 + local max_text_size = settings.max_text_size or math.huge + + -- Process message structure + local boundaries = {} + local cur_boundary + local has_attachments = false + local parts_to_keep = {} + local parts_indexes_to_keep = {} + + -- First pass: identify parts to keep + for i, part in ipairs(task:get_parts()) do + local keep_part = false + + if part:is_text() then + local length = part:get_length() + if length >= min_text_size and length <= max_text_size then + keep_part = true + end + elseif keep_images and part:is_image() then + local cd = part:get_header('Content-Disposition') + if cd and cd:lower():match('inline') then + keep_part = true + end + end + + if keep_part then + table.insert(parts_to_keep, part) + parts_indexes_to_keep[i] = true + else + has_attachments = true + end + end + + -- If no attachments found, return false to indicate that no alterations are required + if not has_attachments then + return false + end + + -- Prepare new message structure + local need_multipart = false + local text_parts_count = 0 + for _, part in ipairs(parts_to_keep) do + if part:is_text() then + text_parts_count = text_parts_count + 1 + end + end + need_multipart = text_parts_count > 1 or (keep_images and next(parts_to_keep)) + + -- Set content type + if need_multipart then + state.new_ct = { + type = 'multipart', + subtype = 'mixed' + } + cur_boundary = '--XXX' + boundaries[1] = cur_boundary + + out[#out + 1] = { + string.format('Content-Type: multipart/mixed; boundary="%s"%s', + cur_boundary, newline_s), + true + } + out[#out + 1] = { '', true } + else + -- Single part message + for _, part in ipairs(parts_to_keep) do + if part:is_text() then + state.new_ct = { + type = 'text', + subtype = part:get_text():is_html() and 'html' or 'plain' + } + break + end + end + end + + -- Second pass: reconstruct message + for i, part in ipairs(task:get_parts()) do + if part:is_multipart() then + -- Skip multipart containers + local boundary = part:get_boundary() + if boundary then + if cur_boundary and boundary ~= cur_boundary then + out[#out + 1] = { + string.format('--%s--', boundaries[#boundaries]), + true + } + table.remove(boundaries) + end + end + elseif parts_indexes_to_keep[i] then + if need_multipart then + out[#out + 1] = { + string.format('--%s', cur_boundary), + true + } + end + + -- Add part headers + local headers = {} + for _, h in ipairs(part:get_header_array()) do + table.insert(headers, string.format('%s: %s', h.name, h.value)) + end + + if #headers > 0 then + out[#out + 1] = { + table.concat(headers, newline_s), + true + } + end + + -- Add empty line between headers and content + out[#out + 1] = { '', true } + + -- Add content + out[#out + 1] = { + part:get_raw_content(), + false + } + end + end + + -- Close remaining boundaries + if need_multipart then + out[#out + 1] = { + string.format('--%s--', cur_boundary), + true + } + end + + state.out = out + state.need_rewrite_ct = true + + return state +end + return exports |