diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-25 12:23:01 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-25 12:23:01 +0000 |
commit | 4e42fd433411b0d91235c2fd71cb90d6c5620341 (patch) | |
tree | 463eece4775809bf40c43c98109cc909b643b87c | |
parent | 84213d0380223c7e1ec4a8b2df84d93e6daa18bf (diff) | |
download | rspamd-vstakhov-strip-attachments.tar.gz rspamd-vstakhov-strip-attachments.zip |
[Project] Various fixes and `rspamadm mime strip` commandvstakhov-strip-attachments
-rw-r--r-- | lualib/lua_mime.lua | 131 | ||||
-rw-r--r-- | lualib/rspamadm/mime.lua | 77 |
2 files changed, 131 insertions, 77 deletions
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua index 71867fbed..795a803e5 100644 --- a/lualib/lua_mime.lua +++ b/lualib/lua_mime.lua @@ -770,9 +770,6 @@ end -- * max_text_size: number, maximum text part size to keep (default: inf) -- @return {table} modified message state similar to other modification functions: -- * out: new content (body only) --- * need_rewrite_ct: boolean field that means if we must rewrite content type --- * new_ct: new content type (type => string, subtype => string) --- * new_cte: new content-transfer encoding (string) --]] exports.remove_attachments = function(task, settings) local newline_s = newline(task) @@ -797,7 +794,7 @@ exports.remove_attachments = function(task, settings) for i, part in ipairs(task:get_parts()) do local keep_part = false - if part:is_text() then + if part:is_text() and not part:is_attachment() then local length = part:get_length() if length >= min_text_size and length <= max_text_size then keep_part = true @@ -822,100 +819,80 @@ exports.remove_attachments = function(task, settings) return false end - -- Prepare new message structure - local need_multipart = false - local text_parts_count = 0 - for _, part in ipairs(parts_to_keep) do - if part:is_text() then - text_parts_count = text_parts_count + 1 - end - end - need_multipart = text_parts_count > 1 or (keep_images and next(parts_to_keep)) - - -- Set content type - if need_multipart then - state.new_ct = { - type = 'multipart', - subtype = 'mixed' - } - cur_boundary = '--XXX' - boundaries[1] = cur_boundary - - out[#out + 1] = { - string.format('Content-Type: multipart/mixed; boundary="%s"%s', - cur_boundary, newline_s), - true - } - out[#out + 1] = { '', true } - else - -- Single part message - for _, part in ipairs(parts_to_keep) do - if part:is_text() then - state.new_ct = { - type = 'text', - subtype = part:get_text():is_html() and 'html' or 'plain' - } - break - end - end - end - -- Second pass: reconstruct message for i, part in ipairs(task:get_parts()) do + local boundary = part:get_boundary() if part:is_multipart() then - -- Skip multipart containers - local boundary = part:get_boundary() + if cur_boundary then + out[#out + 1] = { string.format('--%s', + boundaries[#boundaries]), true } + end + + boundaries[#boundaries + 1] = boundary or '--XXX' + cur_boundary = boundary + + local rh = part:get_raw_headers() + if #rh > 0 then + out[#out + 1] = { rh, true } + end + elseif part:is_message() then if boundary then if cur_boundary and boundary ~= cur_boundary then - out[#out + 1] = { - string.format('--%s--', boundaries[#boundaries]), - true - } + -- Need to close boundary + out[#out + 1] = { string.format('--%s--', + boundaries[#boundaries]), true } table.remove(boundaries) + cur_boundary = nil end - end - elseif parts_indexes_to_keep[i] then - if need_multipart then - out[#out + 1] = { - string.format('--%s', cur_boundary), - true - } + out[#out + 1] = { string.format('--%s', + boundary), true } end - -- Add part headers - local headers = {} - for _, h in ipairs(part:get_header_array()) do - table.insert(headers, string.format('%s: %s', h.name, h.value)) - end + out[#out + 1] = { part:get_raw_headers(), true } + else + if parts_indexes_to_keep[i] then + if boundary then + if cur_boundary and boundary ~= cur_boundary then + -- Need to close previous boundary + out[#out + 1] = { string.format('--%s--', + boundaries[#boundaries]), true } + table.remove(boundaries) + cur_boundary = boundary + end + out[#out + 1] = { string.format('--%s', + boundary), true } + end + -- Add part headers + local headers = part:get_raw_headers() - if #headers > 0 then + if headers then + out[#out + 1] = { + headers, + true + } + end + + -- Add content out[#out + 1] = { - table.concat(headers, newline_s), - true + part:get_raw_content(), + false } end - -- Add empty line between headers and content - out[#out + 1] = { '', true } - - -- Add content - out[#out + 1] = { - part:get_raw_content(), - false - } end end -- Close remaining boundaries - if need_multipart then - out[#out + 1] = { - string.format('--%s--', cur_boundary), - true - } + local b = table.remove(boundaries) + while b do + out[#out + 1] = { string.format('--%s--', b), true } + if #boundaries > 0 then + out[#out + 1] = { '', true } + end + b = table.remove(boundaries) end state.out = out - state.need_rewrite_ct = true return state end diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua index 6a589d66a..7750c5a78 100644 --- a/lualib/rspamadm/mime.lua +++ b/lualib/rspamadm/mime.lua @@ -160,6 +160,25 @@ modify:option "-H --html-footer" :description "Adds footer to text/html parts from a specific file" :argname "<file>" +local strip = parser:command "strip" + :description "Strip attachments from a message" +strip:argument "file" + :description "File to process" + :argname "<file>" + :args "+" +strip:flag "-i --keep-images" + :description "Keep images" +strip:option "--min-text-size" + :description "Minimal text size to keep" + :argname "<size>" + :convert(tonumber) + :default(0) +strip:option "--max-text-size" + :description "Max text size to keep" + :argname "<size>" + :convert(tonumber) + :default(math.huge) + local sign = parser:command "sign" :description "Performs DKIM signing" sign:argument "file" @@ -893,6 +912,62 @@ local function sign_handler(opts) end end +local function strip_handler(opts) + load_config(opts) + rspamd_url.init(rspamd_config:get_tld_path()) + + for _, fname in ipairs(opts.file) do + local task = load_task(opts, fname) + local newline_s = newline(task) + + local rewrite = lua_mime.remove_attachments(task, { + keep_images = opts.keep_images, + min_text_size = opts.min_text_size, + max_text_size = opts.max_text_size + }) or {} + local out = {} -- Start with headers + + local function process_headers_cb(name, hdr) + out[#out + 1] = hdr.raw:gsub('\r?\n?$', '') + end + + task:headers_foreach(process_headers_cb, { full = true }) + -- End of headers + out[#out + 1] = '' + + if rewrite.out then + for _, o in ipairs(rewrite.out) do + out[#out + 1] = o + end + else + out[#out + 1] = { task:get_rawbody(), false } + end + + for _, o in ipairs(out) do + if type(o) == 'string' then + io.write(o) + io.write(newline_s) + elseif type(o) == 'table' then + io.flush() + if type(o[1]) == 'string' then + io.write(o[1]) + else + o[1]:save_in_file(1) + end + + if o[2] then + io.write(newline_s) + end + else + o:save_in_file(1) + io.write(newline_s) + end + end + + task:destroy() -- No automatic dtor + end +end + -- Strips directories and .extensions (if present) from a filepath local function filename_only(filepath) local filename = filepath:match(".*%/([^%.]+)") @@ -995,6 +1070,8 @@ local function handler(args) urls_handler(opts) elseif command == 'modify' then modify_handler(opts) + elseif command == 'strip' then + strip_handler(opts) elseif command == 'sign' then sign_handler(opts) elseif command == 'dump' then |