diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-10-27 15:15:39 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-10-27 15:15:39 +0000 |
commit | 5add81dafbbfd47e089157fae138cf4a5dae7089 (patch) | |
tree | 87aadf5f6aac3f9a048ab2b0b902dfb922e97713 | |
parent | b9f3247f7c893f7d3df95ce1d936c454a7c5ccec (diff) | |
download | rspamd-5add81dafbbfd47e089157fae138cf4a5dae7089.tar.gz rspamd-5add81dafbbfd47e089157fae138cf4a5dae7089.zip |
[Feature] Lua_mime: Add ability to do multipattern replacement
-rw-r--r-- | lualib/lua_mime.lua | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua index 1f0fb38f2..5088f768d 100644 --- a/lualib/lua_mime.lua +++ b/lualib/lua_mime.lua @@ -249,6 +249,256 @@ exports.add_text_footer = function(task, html_footer, text_footer) return state end +local function do_replacement (task, part, mp, replacements, + is_multipart, out, state) + + local tp = part:get_text() + local ct = 'text/plain' + local cte = 'quoted-printable' + local newline_s = state.newline_s + + if tp:is_html() then + ct = 'text/html' + end + + local encode_func = function(input) + return rspamd_util.encode_qp(input, 80, task:get_newlines_type()) + end + + if part:get_cte() == '7bit' then + cte = '7bit' + encode_func = function(input) + if type(input) == 'userdata' then + return input + else + return rspamd_text.fromstring(input) + end + end + end + + if is_multipart then + out[#out + 1] = string.format('Content-Type: %s; charset=utf-8%s'.. + 'Content-Transfer-Encoding: %s', + ct, newline_s, cte) + out[#out + 1] = '' + else + state.new_cte = cte + end + + local content = tp:get_content('raw_utf') or rspamd_text.fromstring('') + local match_pos = mp:match(content, true) + + if match_pos then + -- sort matches and form the table: + -- start .. end for inclusion position + local matches_flattened = {} + for npat,matches in pairs(match_pos) do + for _,m in ipairs(matches) do + table.insert(matches_flattened, {m, npat}) + end + end + + -- Handle the case of empty match + if #matches_flattened == 0 then + out[#out + 1] = {part:get_raw_headers(), true} + out[#out + 1] = {part:get_raw_content(), false} + + return + end + + -- now sort flattened by start of match and eliminate all overlaps + table.sort(matches_flattened, function(m1, m2) return m1[1][1] < m2[1][1] end) + + for i=1,#matches_flattened - 1 do + local st = matches_flattened[i][1][1] -- current start of match + local e = matches_flattened[i][1][2] -- current end of match + local max_npat = matches_flattened[i][2] + for j=i+1,#matches_flattened do + if matches_flattened[j][1][1] == st then + -- overlap + if matches_flattened[j][1][2] > e then + -- larger exclusion and switch replacement + e = matches_flattened[j][1][2] + max_npat = matches_flattened[j][2] + end + else + break + end + end + -- Maximum overlap for all matches + for j=i,#matches_flattened do + if matches_flattened[j][1][1] == st then + if e > matches_flattened[j][1][2] then + matches_flattened[j][1][2] = e + matches_flattened[j][2] = max_npat + end + else + break + end + end + end + -- Now flattened match table is sorted by start pos and has the maximum overlapped pattern + + local cur_start = 1 + local fragments = {} + for _,m in ipairs(matches_flattened) do + if m[1][1] > cur_start then + fragments[#fragments + 1] = content:span(cur_start, m[1][1] - cur_start) + fragments[#fragments + 1] = replacements[m[2]] + cur_start = m[1][2] + 1 -- end of match + end + end + -- last part + if cur_start < #content then + fragments[#fragments + 1] = content:span(cur_start) + end + + -- Final stuff + out[#out + 1] = {encode_func(rspamd_text.fromtable(fragments)), true} + out[#out + 1] = '' + else + -- No matches + out[#out + 1] = {part:get_raw_headers(), true} + out[#out + 1] = {part:get_raw_content(), false} + end +end + +--[[[ +-- @function lua_mime.multipattern_text_replace(task, mp, replacements) +-- Replaces text according to multipattern matches. It returns a table with the following +-- fields: +-- * out: new content (body only) +-- * need_rewrite_ct: boolean field that means if we must rewrite content type +-- * new_ct: new content type (type => string, subtype => string) +-- * new_cte: new content-transfer encoding (string) +--]] +exports.multipattern_text_replace = function(task, mp, replacements) + local newline_s = newline(task) + local state = { + newline_s = newline_s + } + local out = {} + local text_parts = task:get_text_parts() + + if not mp or not (text_parts and #text_parts > 0) then + return false + end + + -- We need to take extra care about content-type and cte + local ct = task:get_header('Content-Type') + if ct then + ct = rspamd_util.parse_content_type(ct, task:get_mempool()) + end + + if ct then + if ct.type and ct.type == 'text' then + state.need_rewrite_ct = true + state.new_ct = ct + end + else + -- No explicit CT, need to guess + if text_parts then + if #text_parts == 1 then + state.need_rewrite_ct = true + state.new_ct = { + type = 'text', + subtype = 'plain' + } + elseif #text_parts > 1 then + -- XXX: in fact, it cannot be + state.new_ct = { + type = 'multipart', + subtype = 'mixed' + } + end + end + end + + local boundaries = {} + local cur_boundary + for _,part in ipairs(task:get_parts()) do + local boundary = part:get_boundary() + if part:is_multipart() then + if cur_boundary then + out[#out + 1] = string.format('--%s', + boundaries[#boundaries]) + end + + boundaries[#boundaries + 1] = boundary or '--XXX' + cur_boundary = boundary + + local rh = part:get_raw_headers() + if #rh > 0 then + out[#out + 1] = {rh, true} + end + elseif part:is_message() then + if boundary then + if cur_boundary and boundary ~= cur_boundary then + -- Need to close boundary + out[#out + 1] = string.format('--%s--%s', + boundaries[#boundaries], newline_s) + table.remove(boundaries) + cur_boundary = nil + end + out[#out + 1] = string.format('--%s', + boundary) + end + + out[#out + 1] = {part:get_raw_headers(), true} + else + local skip_replacement = part:is_attachment() + + local parent = part:get_parent() + if parent then + local t,st = parent:get_type() + + if t == 'multipart' and st == 'signed' then + -- Do not modify signed parts + skip_replacement = true + end + end + if not part:is_text() then + skip_replacement = true + end + + if boundary then + if cur_boundary and boundary ~= cur_boundary then + -- Need to close boundary + out[#out + 1] = string.format('--%s--%s', + boundaries[#boundaries], newline_s) + table.remove(boundaries) + cur_boundary = boundary + end + out[#out + 1] = string.format('--%s', + boundary) + end + + if not skip_replacement then + do_replacement(task, part, mp, replacements, + parent and parent:is_multipart(), out, state) + else + -- Append as is + out[#out + 1] = {part:get_raw_headers(), true} + out[#out + 1] = {part:get_raw_content(), false} + end + end + end + + -- Close remaining + local b = table.remove(boundaries) + while b do + out[#out + 1] = string.format('--%s--', b) + if #boundaries > 0 then + out[#out + 1] = '' + end + b = table.remove(boundaries) + end + + state.out = out + + return state +end + -- All mime extensions with corresponding content types exports.full_extensions_map = { {"323", "text/h323"}, |