aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-12-11 13:55:03 +0000
committerVsevolod Stakhov <vsevolod@rspamd.com>2024-12-11 13:55:03 +0000
commit7e6bec3c411cf368f37c1701141a5f65895b741a (patch)
treeaedb460be0ec8b5bbec6b271656af979ddeb25c1
parentb0e0367901b4223f68a6de8edf1fede2c7eedcd0 (diff)
downloadrspamd-7e6bec3c411cf368f37c1701141a5f65895b741a.tar.gz
rspamd-7e6bec3c411cf368f37c1701141a5f65895b741a.zip
[Rework] Multimap should use only distinct text parts for content matching
Issue: #5248
-rw-r--r--lualib/lua_mime.lua43
-rw-r--r--src/plugins/lua/multimap.lua7
2 files changed, 47 insertions, 3 deletions
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua
index 1135f2b63..f68758ec9 100644
--- a/lualib/lua_mime.lua
+++ b/lualib/lua_mime.lua
@@ -957,6 +957,49 @@ exports.get_displayed_text_part = function(task)
end
--[[[
+-- @function lua_mime.get_distinct_text_parts(task)
+-- Returns the list of parts that are visible or have a distinct content
+-- @param {task} task Rspamd task object
+-- @return array of {text_part} a selected part
+--]]
+exports.get_distinct_text_parts = function(task)
+ local text_parts = task:get_text_parts()
+ if not text_parts then
+ return {}
+ end
+
+ local text_part_idx
+
+ local distance = task:get_mempool():get_variable('parts_distance', 'double')
+ if not distance then
+ return text_parts
+ end
+ distance = tonumber(distance)
+
+ if distance > 0.5 then
+ -- Parts are distinct
+ return text_parts
+ end
+
+ -- First pass: categorize parts
+ for i, part in ipairs(text_parts) do
+ local mp = part:get_mimepart()
+ if not mp:is_attachment() then
+ if not part:is_html() then
+ -- Found text part that is similar to html part
+ text_part_idx = i
+ end
+ end
+ end
+
+ if text_part_idx then
+ table.remove(text_parts, text_part_idx)
+ end
+
+ return text_parts
+end
+
+--[[[
-- @function lua_mime.anonymize_message(task, settings)
-- Anonymizes message content by replacing sensitive data
-- @param {task} task Rspamd task object
diff --git a/src/plugins/lua/multimap.lua b/src/plugins/lua/multimap.lua
index e852ce15e..a61da606b 100644
--- a/src/plugins/lua/multimap.lua
+++ b/src/plugins/lua/multimap.lua
@@ -29,6 +29,7 @@ local rspamd_ip = require "rspamd_ip"
local lua_util = require "lua_util"
local lua_selectors = require "lua_selectors"
local lua_maps = require "lua_maps"
+local lua_mime = require "lua_mime"
local redis_params
local fun = require "fun"
local N = 'multimap'
@@ -453,19 +454,19 @@ local function apply_content_filter(task, filter)
return { task:get_raw_headers() }
elseif filter == 'text' then
local ret = {}
- for _, p in ipairs(task:get_text_parts()) do
+ for _, p in ipairs(lua_mime.get_distinct_text_parts(task)) do
table.insert(ret, p:get_content())
end
return ret
elseif filter == 'rawtext' then
local ret = {}
- for _, p in ipairs(task:get_text_parts()) do
+ for _, p in ipairs(lua_mime.get_distinct_text_parts(task)) do
table.insert(ret, p:get_content('raw_parsed'))
end
return ret
elseif filter == 'oneline' then
local ret = {}
- for _, p in ipairs(task:get_text_parts()) do
+ for _, p in ipairs(lua_mime.get_distinct_text_parts(task)) do
table.insert(ret, p:get_content_oneline())
end
return ret