aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-11-26 20:05:39 +0600
committerGitHub <noreply@github.com>2024-11-26 20:05:39 +0600
commit6c0223b32b8fcb6621fa64197214abb400a09f52 (patch)
tree463eece4775809bf40c43c98109cc909b643b87c
parent02d0a8b6a9682dadbe0146ac6e875f53cd55c39c (diff)
parent4e42fd433411b0d91235c2fd71cb90d6c5620341 (diff)
downloadrspamd-6c0223b32b8fcb6621fa64197214abb400a09f52.tar.gz
rspamd-6c0223b32b8fcb6621fa64197214abb400a09f52.zip
Merge pull request #5233 from rspamd/vstakhov-strip-attachments
Allow to strip attachments
-rw-r--r--lualib/lua_mime.lua137
-rw-r--r--lualib/rspamadm/mime.lua77
2 files changed, 214 insertions, 0 deletions
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua
index ea6bf5125..795a803e5 100644
--- a/lualib/lua_mime.lua
+++ b/lualib/lua_mime.lua
@@ -760,4 +760,141 @@ exports.message_to_ucl_schema = function()
}
end
+--[[[
+-- @function lua_mime.remove_attachments(task, settings)
+-- Removes all attachments from a message, keeping only text parts
+-- @param {task} task Rspamd task object
+-- @param {table} settings Table with the following fields:
+-- * keep_images: boolean, whether to keep inline images (default: false)
+-- * min_text_size: number, minimum text part size to keep (default: 0)
+-- * max_text_size: number, maximum text part size to keep (default: inf)
+-- @return {table} modified message state similar to other modification functions:
+-- * out: new content (body only)
+--]]
+exports.remove_attachments = function(task, settings)
+ local newline_s = newline(task)
+ local state = {
+ newline_s = newline_s
+ }
+ local out = {}
+
+ settings = settings or {}
+ local keep_images = settings.keep_images or false
+ local min_text_size = settings.min_text_size or 0
+ local max_text_size = settings.max_text_size or math.huge
+
+ -- Process message structure
+ local boundaries = {}
+ local cur_boundary
+ local has_attachments = false
+ local parts_to_keep = {}
+ local parts_indexes_to_keep = {}
+
+ -- First pass: identify parts to keep
+ for i, part in ipairs(task:get_parts()) do
+ local keep_part = false
+
+ if part:is_text() and not part:is_attachment() then
+ local length = part:get_length()
+ if length >= min_text_size and length <= max_text_size then
+ keep_part = true
+ end
+ elseif keep_images and part:is_image() then
+ local cd = part:get_header('Content-Disposition')
+ if cd and cd:lower():match('inline') then
+ keep_part = true
+ end
+ end
+
+ if keep_part then
+ table.insert(parts_to_keep, part)
+ parts_indexes_to_keep[i] = true
+ else
+ has_attachments = true
+ end
+ end
+
+ -- If no attachments found, return false to indicate that no alterations are required
+ if not has_attachments then
+ return false
+ end
+
+ -- Second pass: reconstruct message
+ for i, part in ipairs(task:get_parts()) do
+ local boundary = part:get_boundary()
+ if part:is_multipart() then
+ if cur_boundary then
+ out[#out + 1] = { string.format('--%s',
+ boundaries[#boundaries]), true }
+ end
+
+ boundaries[#boundaries + 1] = boundary or '--XXX'
+ cur_boundary = boundary
+
+ local rh = part:get_raw_headers()
+ if #rh > 0 then
+ out[#out + 1] = { rh, true }
+ end
+ elseif part:is_message() then
+ if boundary then
+ if cur_boundary and boundary ~= cur_boundary then
+ -- Need to close boundary
+ out[#out + 1] = { string.format('--%s--',
+ boundaries[#boundaries]), true }
+ table.remove(boundaries)
+ cur_boundary = nil
+ end
+ out[#out + 1] = { string.format('--%s',
+ boundary), true }
+ end
+
+ out[#out + 1] = { part:get_raw_headers(), true }
+ else
+ if parts_indexes_to_keep[i] then
+ if boundary then
+ if cur_boundary and boundary ~= cur_boundary then
+ -- Need to close previous boundary
+ out[#out + 1] = { string.format('--%s--',
+ boundaries[#boundaries]), true }
+ table.remove(boundaries)
+ cur_boundary = boundary
+ end
+ out[#out + 1] = { string.format('--%s',
+ boundary), true }
+ end
+ -- Add part headers
+ local headers = part:get_raw_headers()
+
+ if headers then
+ out[#out + 1] = {
+ headers,
+ true
+ }
+ end
+
+ -- Add content
+ out[#out + 1] = {
+ part:get_raw_content(),
+ false
+ }
+ end
+
+ end
+ end
+
+ -- Close remaining boundaries
+ local b = table.remove(boundaries)
+ while b do
+ out[#out + 1] = { string.format('--%s--', b), true }
+ if #boundaries > 0 then
+ out[#out + 1] = { '', true }
+ end
+ b = table.remove(boundaries)
+ end
+
+ state.out = out
+
+ return state
+end
+
return exports
diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua
index 6a589d66a..7750c5a78 100644
--- a/lualib/rspamadm/mime.lua
+++ b/lualib/rspamadm/mime.lua
@@ -160,6 +160,25 @@ modify:option "-H --html-footer"
:description "Adds footer to text/html parts from a specific file"
:argname "<file>"
+local strip = parser:command "strip"
+ :description "Strip attachments from a message"
+strip:argument "file"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
+strip:flag "-i --keep-images"
+ :description "Keep images"
+strip:option "--min-text-size"
+ :description "Minimal text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(0)
+strip:option "--max-text-size"
+ :description "Max text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(math.huge)
+
local sign = parser:command "sign"
:description "Performs DKIM signing"
sign:argument "file"
@@ -893,6 +912,62 @@ local function sign_handler(opts)
end
end
+local function strip_handler(opts)
+ load_config(opts)
+ rspamd_url.init(rspamd_config:get_tld_path())
+
+ for _, fname in ipairs(opts.file) do
+ local task = load_task(opts, fname)
+ local newline_s = newline(task)
+
+ local rewrite = lua_mime.remove_attachments(task, {
+ keep_images = opts.keep_images,
+ min_text_size = opts.min_text_size,
+ max_text_size = opts.max_text_size
+ }) or {}
+ local out = {} -- Start with headers
+
+ local function process_headers_cb(name, hdr)
+ out[#out + 1] = hdr.raw:gsub('\r?\n?$', '')
+ end
+
+ task:headers_foreach(process_headers_cb, { full = true })
+ -- End of headers
+ out[#out + 1] = ''
+
+ if rewrite.out then
+ for _, o in ipairs(rewrite.out) do
+ out[#out + 1] = o
+ end
+ else
+ out[#out + 1] = { task:get_rawbody(), false }
+ end
+
+ for _, o in ipairs(out) do
+ if type(o) == 'string' then
+ io.write(o)
+ io.write(newline_s)
+ elseif type(o) == 'table' then
+ io.flush()
+ if type(o[1]) == 'string' then
+ io.write(o[1])
+ else
+ o[1]:save_in_file(1)
+ end
+
+ if o[2] then
+ io.write(newline_s)
+ end
+ else
+ o:save_in_file(1)
+ io.write(newline_s)
+ end
+ end
+
+ task:destroy() -- No automatic dtor
+ end
+end
+
-- Strips directories and .extensions (if present) from a filepath
local function filename_only(filepath)
local filename = filepath:match(".*%/([^%.]+)")
@@ -995,6 +1070,8 @@ local function handler(args)
urls_handler(opts)
elseif command == 'modify' then
modify_handler(opts)
+ elseif command == 'strip' then
+ strip_handler(opts)
elseif command == 'sign' then
sign_handler(opts)
elseif command == 'dump' then