aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lualib/lua_mime.lua178
1 files changed, 178 insertions, 0 deletions
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua
index 795a803e5..167939189 100644
--- a/lualib/lua_mime.lua
+++ b/lualib/lua_mime.lua
@@ -897,4 +897,182 @@ exports.remove_attachments = function(task, settings)
return state
end
+--[[[
+-- @function lua_mime.anonymize_message(task, settings)
+-- Anonymizes message content by replacing sensitive data
+-- @param {task} task Rspamd task object
+-- @param {table} settings Table with the following fields:
+-- * strip_attachments: boolean, whether to strip all attachments
+-- * custom_header_process: table of header_name => function(orig_header) pairs
+-- @return {table} modified message state similar to other modification functions
+--]]
+exports.anonymize_message = function(task, settings)
+ local newline_s = newline(task)
+ local state = {
+ newline_s = newline_s
+ }
+ local out = {}
+
+ -- Default header processors
+ local function anonymize_email_header(hdr)
+ local addrs = rspamd_util.parse_mail_address(hdr.value, task:get_mempool())
+ if addrs and addrs[1] then
+ local modified = {}
+ for _, addr in ipairs(addrs) do
+ table.insert(modified, string.format('anonymous@%s', addr.domain or 'example.com'))
+ end
+
+ return table.concat(modified, ',')
+ end
+ return 'anonymous@example.com'
+ end
+
+ local function anonymize_received_header(hdr)
+ local processed = string.gsub(hdr.value, '%d+%.%d+%.%d+%.%d+', 'x.x.x.x')
+ processed = string.gsub(processed, '%x+:%x+:%x+:%x+:%x+:%x+:%x+:%x+', 'x:x:x:x:x:x:x:x')
+ return processed
+ end
+
+ local default_header_process = {
+ ['from'] = anonymize_email_header,
+ ['to'] = anonymize_email_header,
+ ['cc'] = anonymize_email_header,
+ ['bcc'] = anonymize_email_header,
+ ['received'] = anonymize_received_header,
+ }
+
+ -- Merge with custom processors
+ local header_processors = settings.custom_header_process or {}
+ for k, v in pairs(default_header_process) do
+ if not header_processors[k] then
+ header_processors[k] = v
+ end
+ end
+
+ -- Process headers
+ local modified_headers = {}
+ for name, processor in pairs(header_processors) do
+ local hdrs = task:get_header_full(name, true)
+ if hdrs then
+ for _, hdr in ipairs(hdrs) do
+ local new_value = processor(hdr)
+ if new_value then
+ table.insert(modified_headers, {
+ name = name,
+ value = new_value
+ })
+ end
+ end
+ end
+ end
+
+ -- Create new text content
+ local text_content = {}
+ local urls = {}
+ local emails = {}
+
+ -- Extract text content, URLs and emails
+ local text_parts = task:get_text_parts()
+ for _, part in ipairs(text_parts) do
+ if part:is_html() then
+ local words = part:get_words('norm')
+ if words then
+ text_content = words
+ end
+ break -- Use only first HTML part
+ end
+ end
+
+ -- If no HTML parts found, use first text part
+ if #text_content == 0 then
+ for _, part in ipairs(text_parts) do
+ if not part:is_html() then
+ local words = part:get_words('norm')
+ if words then
+ text_content = words
+ end
+ break
+ end
+ end
+ end
+
+ -- Process URLs
+ local function process_url(url)
+ local clean_url = url:get_host()
+ local path = url:get_path()
+ if path and path ~= "/" then
+ clean_url = clean_url .. path
+ end
+ return string.format('https://%s', clean_url)
+ end
+
+ for _, url in ipairs(task:get_urls(true)) do
+ table.insert(urls, process_url(url))
+ end
+
+ -- Process emails
+ local function process_email(email)
+ return string.format('nobody@%s', email.domain or 'example.com')
+ end
+
+ for _, email in ipairs(task:get_emails()) do
+ table.insert(emails, process_email(email))
+ end
+
+ -- Construct new message
+ table.insert(text_content, '\nurls: ')
+ table.insert(text_content, table.concat(urls, ', '))
+ table.insert(text_content, '\nemails: ')
+ table.insert(text_content, table.concat(emails, ', '))
+ local new_text = table.concat(text_content, ' ')
+
+ -- Create new message structure
+ local boundaries = {}
+ local cur_boundary = '--XXX'
+ boundaries[1] = cur_boundary
+
+ -- Add headers
+ out[#out + 1] = {
+ string.format('Content-Type: multipart/mixed; boundary="%s"', cur_boundary),
+ true
+ }
+ for _, hdr in ipairs(modified_headers) do
+ out[#out + 1] = {
+ string.format('%s: %s', hdr.name, hdr.value),
+ true
+ }
+ end
+ out[#out + 1] = { '', true }
+
+ -- Add text part
+ out[#out + 1] = {
+ string.format('--%s', cur_boundary),
+ true
+ }
+ out[#out + 1] = {
+ 'Content-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: quoted-printable',
+ true
+ }
+ out[#out + 1] = { '', true }
+ out[#out + 1] = {
+ rspamd_util.encode_qp(new_text, 76, task:get_newlines_type()),
+ false
+ }
+
+ -- Close boundaries
+ out[#out + 1] = {
+ string.format('--%s--', cur_boundary),
+ true
+ }
+
+ state.out = out
+ state.need_rewrite_ct = true
+ state.new_ct = {
+ type = 'multipart',
+ subtype = 'mixed'
+ }
+
+ return state
+end
+
return exports