diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-11-24 09:35:18 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-11-25 12:35:06 +0000 |
commit | 9744716ba7e00681a7dbe7091cdb6ecd421ad32b (patch) | |
tree | b29fdbcfe6d2fc8bf2d300dc2e8a148e5f2a2b01 | |
parent | 9374c19e58b3743f96b6b13e88d1366660cee696 (diff) | |
download | rspamd-9744716ba7e00681a7dbe7091cdb6ecd421ad32b.tar.gz rspamd-9744716ba7e00681a7dbe7091cdb6ecd421ad32b.zip |
[Project] Further content module work
-rw-r--r-- | lualib/lua_content/ical.lua | 9 | ||||
-rw-r--r-- | lualib/lua_content/init.lua | 73 | ||||
-rw-r--r-- | src/libmime/message.c | 113 |
3 files changed, 118 insertions, 77 deletions
diff --git a/lualib/lua_content/ical.lua b/lualib/lua_content/ical.lua index 4f6b61919..c19723614 100644 --- a/lualib/lua_content/ical.lua +++ b/lualib/lua_content/ical.lua @@ -15,6 +15,7 @@ limitations under the License. ]]-- local l = require 'lpeg' +local rspamd_text = require "rspamd_text" local wsp = l.P" " local crlf = l.P"\r"^-1 * l.P"\n" @@ -25,7 +26,7 @@ local elt = name * ":" * wsp^0 * value * eol local exports = {} -local function ical_txt_values(input) +local function process_ical(input, _, _) local control={n='\n', r='\r'} local escaper = l.Ct((elt / function(_,b) return (b:gsub("\\(.)", control)) end)^1) @@ -35,13 +36,13 @@ local function ical_txt_values(input) return nil end - return table.concat(values, "\n") + return rspamd_text.fromtable(values, "\n") end --[[[ --- @function lua_ical.ical_txt_values(input) +-- @function lua_ical.process(input) -- Returns all values from ical as a plain text. Names are completely ignored. --]] -exports.ical_txt_values = ical_txt_values +exports.process = process_ical return exports
\ No newline at end of file diff --git a/lualib/lua_content/init.lua b/lualib/lua_content/init.lua index a96852139..994d613f4 100644 --- a/lualib/lua_content/init.lua +++ b/lualib/lua_content/init.lua @@ -17,4 +17,75 @@ limitations under the License. --[[[ -- @module lua_content -- This module contains content processing logic ---]]
\ No newline at end of file +--]] + + +local exports = {} +local N = "lua_content" +local lua_util = require "lua_util" + +local content_modules = { + ical = { + mime_type = "text/calendar", + module = require "lua_content/ical", + extensions = {'ical'}, + output = "text" + }, +} + +local modules_by_mime_type +local modules_by_extension + +local function init() + modules_by_mime_type = {} + modules_by_extension = {} + for k,v in pairs(content_modules) do + if v.mime_type then + modules_by_mime_type[v.mime_type] = {k, v} + end + if v.extensions then + for _,ext in ipairs(v.extensions) do + modules_by_extension[ext] = {k, v} + end + end + end +end + +exports.maybe_process_mime_part = function(part, log_obj) + if not modules_by_mime_type then + init() + end + + local ctype, csubtype = part:get_type() + local mt = string.format("%s/%s", ctype or 'application', + csubtype or 'octet-stream') + local pair = modules_by_mime_type[mt] + + if not pair then + local ext = part:get_detected_ext() + + if ext then + pair = modules_by_extension[ext] + end + end + + if pair then + lua_util.debugm(N, log_obj, "found known content of type %s: %s", + mt, pair[1]) + + local data = pair[2].module.process(part:get_content(), part, log_obj) + + if data then + lua_util.debugm(N, log_obj, "extracted content from %s: %s type", + pair[1], type(data)) + part:set_specific(data) + else + lua_util.debugm(N, log_obj, "failed to extract anything from %s", + pair[1]) + end + end + +end + + +return exports
\ No newline at end of file diff --git a/src/libmime/message.c b/src/libmime/message.c index 7d2d81a7f..53c3cce27 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -694,71 +694,8 @@ rspamd_message_process_plain_text_part (struct rspamd_task *task, rspamd_mime_text_part_maybe_convert (task, text_part); if (text_part->utf_raw_content != NULL) { - /* Check for ical */ - rspamd_ftok_t cal_ct; - - /* - * TODO: If we want to process more than that, we need - * to create some generic framework that accepts a part - * and returns a processed data - */ - RSPAMD_FTOK_ASSIGN (&cal_ct, "calendar"); - - if (rspamd_ftok_casecmp (&cal_ct, &text_part->mime_part->ct->subtype) == 0) { - lua_State *L = task->cfg->lua_state; - gint err_idx; - - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); - - /* Obtain function */ - if (!rspamd_lua_require_function (L, "lua_ical", "ical_txt_values")) { - msg_err_task ("cannot require lua_ical.ical_txt_values"); - lua_settop (L, err_idx - 1); - - return FALSE; - } - - lua_pushlstring (L, text_part->utf_raw_content->data, - text_part->utf_raw_content->len); - - if (lua_pcall (L, 1, 1, err_idx) != 0) { - msg_err_task ("cannot call lua lua_ical.ical_txt_values: %s", - lua_tostring (L, -1)); - lua_settop (L, err_idx - 1); - - return FALSE; - } - - if (lua_type (L, -1) == LUA_TSTRING) { - const char *ndata; - gsize nsize; - - ndata = lua_tolstring (L, -1, &nsize); - text_part->utf_content = g_byte_array_sized_new (nsize); - g_byte_array_append (text_part->utf_content, ndata, nsize); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t) free_byte_array_callback, - text_part->utf_content); - } - else if (lua_type (L, -1) == LUA_TNIL) { - msg_info_task ("cannot convert text/calendar to plain text"); - text_part->utf_content = text_part->utf_raw_content; - } - else { - msg_err_task ("invalid return type when calling lua_ical.ical_txt_values: %s", - lua_typename (L, lua_type (L, -1))); - lua_settop (L, err_idx - 1); - - return FALSE; - } - - lua_settop (L, err_idx - 1); - } - else { - /* Just have the same content */ - text_part->utf_content = text_part->utf_raw_content; - } + /* Just have the same content */ + text_part->utf_content = text_part->utf_raw_content; } else { /* @@ -1378,7 +1315,7 @@ rspamd_message_process (struct rspamd_task *task) guint tw, *ptw, dw; struct rspamd_mime_part *part; lua_State *L = NULL; - gint func_pos = -1; + gint magic_func_pos = -1, content_func_pos = -1, old_top = -1; if (task->cfg) { L = task->cfg->lua_state; @@ -1386,20 +1323,32 @@ rspamd_message_process (struct rspamd_task *task) rspamd_archives_process (task); + if (L) { + old_top = lua_gettop (L); + } + if (L && rspamd_lua_require_function (L, "lua_magic", "detect_mime_part")) { - func_pos = lua_gettop (L); + magic_func_pos = lua_gettop (L); } else { msg_err_task ("cannot require lua_magic.detect_mime_part"); } + if (L && rspamd_lua_require_function (L, + "lua_content", "maybe_process_mime_part")) { + content_func_pos = lua_gettop (L); + } + else { + msg_err_task ("cannot require lua_content.maybe_process_mime_part"); + } + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { - if (func_pos != -1 && part->parsed_data.len > 0) { + if (magic_func_pos != -1 && part->parsed_data.len > 0) { struct rspamd_mime_part **pmime; struct rspamd_task **ptask; - lua_pushvalue (L, func_pos); + lua_pushvalue (L, magic_func_pos); pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *)); rspamd_lua_setclass (L, "rspamd{mimepart}", -1); *pmime = part; @@ -1447,7 +1396,27 @@ rspamd_message_process (struct rspamd_task *task) } } - lua_settop (L, func_pos); + lua_settop (L, magic_func_pos); + } + + /* Now detect content */ + if (content_func_pos != -1 && part->parsed_data.len > 0) { + struct rspamd_mime_part **pmime; + struct rspamd_task **ptask; + + lua_pushvalue (L, content_func_pos); + pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *)); + rspamd_lua_setclass (L, "rspamd{mimepart}", -1); + *pmime = part; + ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); + rspamd_lua_setclass (L, "rspamd{task}", -1); + *ptask = task; + + if (lua_pcall (L, 2, 2, 0) != 0) { + msg_err_task ("cannot detect content: %s", lua_tostring (L, -1)); + } + + lua_settop (L, magic_func_pos); } if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) { @@ -1455,8 +1424,8 @@ rspamd_message_process (struct rspamd_task *task) } } - if (func_pos != -1) { - lua_settop (L, func_pos - 1); + if (old_top != -1) { + lua_settop (L, old_top); } /* Calculate average words length and number of short words */ |