From 65f15e69284e38d5bbf2177f4466975eca5779b8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 3 Feb 2011 20:29:27 +0300 Subject: [PATCH] * New module for checking emails inside messages (rules based, like multimap) * Emails now are separated from urls and urls checks * Add ability to check text attachements if option is presented in a configuration Version is 0.3.6 now --- CMakeLists.txt | 2 +- lib/librspamdclient.c | 6 +- src/cfg_file.h | 1 + src/cfg_xml.c | 6 ++ src/lua/lua_common.h | 2 +- src/lua/lua_task.c | 111 ++++++++++++++++++++++- src/main.h | 1 + src/message.c | 4 +- src/plugins/lua/emails.lua | 167 +++++++++++++++++++++++++++++++++++ src/plugins/lua/multimap.lua | 10 +-- src/plugins/lua/trie.lua | 2 - src/url.c | 51 +++++++---- src/url.h | 6 +- src/worker.c | 3 + 14 files changed, 336 insertions(+), 36 deletions(-) create mode 100644 src/plugins/lua/emails.lua diff --git a/CMakeLists.txt b/CMakeLists.txt index d6c3ac576..79cf3b782 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ PROJECT(rspamd C) SET(RSPAMD_VERSION_MAJOR 0) SET(RSPAMD_VERSION_MINOR 3) -SET(RSPAMD_VERSION_PATCH 5) +SET(RSPAMD_VERSION_PATCH 6) SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}") diff --git a/lib/librspamdclient.c b/lib/librspamdclient.c index 53c3dcc0f..2bc45c888 100644 --- a/lib/librspamdclient.c +++ b/lib/librspamdclient.c @@ -606,9 +606,9 @@ parse_rspamd_header_line (struct rspamd_connection *conn, guint len, GError **er } else { /* Create header value */ - hvalue = g_malloc (p - c + 1); - hvalue[p - c] = '\0'; - memcpy (hvalue, c, p - c); + hvalue = g_malloc (p - c + 2); + hvalue[p - c + 1] = '\0'; + memcpy (hvalue, c, p - c + 1); g_hash_table_replace (conn->result->headers, hname, hvalue); state = 99; } diff --git a/src/cfg_file.h b/src/cfg_file.h index 81cfe65cc..e33b1585a 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -252,6 +252,7 @@ struct config_file { gboolean no_fork; /**< if 1 do not call daemon() */ gboolean config_test; /**< if TRUE do only config file test */ gboolean raw_mode; /**< work in raw mode instead of utf one */ + gboolean check_text_attachements; /**< check text attachements as text */ gboolean convert_config; /**< convert config to XML format */ enum rspamd_log_type log_type; /**< log type */ diff --git a/src/cfg_xml.c b/src/cfg_xml.c index 70f3ebda8..b4eaee045 100644 --- a/src/cfg_xml.c +++ b/src/cfg_xml.c @@ -111,6 +111,12 @@ static struct xml_parser_rule grammar[] = { G_STRUCT_OFFSET (struct config_file, raw_mode), NULL }, + { + "check_attachements", + xml_handle_boolean, + G_STRUCT_OFFSET (struct config_file, check_text_attachements), + NULL + }, { "tempdir", xml_handle_string, diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index 3e0fbed88..d70501034 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -16,7 +16,7 @@ extern const luaL_reg null_reg[]; -#define RSPAMD_LUA_API_VERSION 1 +#define RSPAMD_LUA_API_VERSION 2 /* Common utility functions */ void lua_newclass (lua_State *L, const gchar *classname, const struct luaL_reg *func); diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 67fd3e7b9..892bbdf07 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -45,6 +45,7 @@ extern stat_file_t* get_statfile_by_symbol (statfile_pool_t *pool, struct classi LUA_FUNCTION_DEF (task, get_message); LUA_FUNCTION_DEF (task, insert_result); LUA_FUNCTION_DEF (task, get_urls); +LUA_FUNCTION_DEF (task, get_emails); LUA_FUNCTION_DEF (task, get_text_parts); LUA_FUNCTION_DEF (task, get_raw_headers); LUA_FUNCTION_DEF (task, get_received_headers); @@ -68,6 +69,7 @@ static const struct luaL_reg tasklib_m[] = { LUA_INTERFACE_DEF (task, get_message), LUA_INTERFACE_DEF (task, insert_result), LUA_INTERFACE_DEF (task, get_urls), + LUA_INTERFACE_DEF (task, get_emails), LUA_INTERFACE_DEF (task, get_text_parts), LUA_INTERFACE_DEF (task, get_raw_headers), LUA_INTERFACE_DEF (task, get_received_headers), @@ -241,6 +243,33 @@ lua_task_get_urls (lua_State * L) return 1; } +static gint +lua_task_get_emails (lua_State * L) +{ + gint i = 1; + struct worker_task *task = lua_check_task (L); + GList *cur; + struct uri **purl; + + if (task) { + cur = task->emails; + if (cur != NULL) { + lua_newtable (L); + while (cur) { + purl = lua_newuserdata (L, sizeof (struct uri *)); + lua_setclass (L, "rspamd{url}", -1); + *purl = cur->data; + lua_rawseti (L, -2, i++); + cur = g_list_next (cur); + } + return 1; + } + } + + lua_pushnil (L); + return 1; +} + static gint lua_task_get_text_parts (lua_State * L) { @@ -315,8 +344,14 @@ lua_task_get_received_headers (lua_State * L) struct lua_dns_callback_data { lua_State *L; struct worker_task *task; - const gchar *callback; - const gchar *to_resolve; + const gchar *callback; + const gchar *to_resolve; + gint cbtype; + union { + gpointer string; + gboolean boolean; + gdouble number; + } cbdata; }; static void @@ -385,7 +420,22 @@ lua_dns_callback (struct rspamd_dns_reply *reply, gpointer arg) lua_pushstring (cd->L, dns_strerror (reply->code)); } - if (lua_pcall (cd->L, 4, 0, 0) != 0) { + switch (cd->cbtype) { + case LUA_TBOOLEAN: + lua_pushboolean (cd->L, cd->cbdata.boolean); + break; + case LUA_TNUMBER: + lua_pushnumber (cd->L, cd->cbdata.number); + break; + case LUA_TSTRING: + lua_pushstring (cd->L, cd->cbdata.string); + break; + default: + lua_pushnil (cd->L); + break; + } + + if (lua_pcall (cd->L, 5, 0, 0) != 0) { msg_info ("call to %s failed: %s", cd->callback, lua_tostring (cd->L, -1)); } @@ -409,6 +459,25 @@ lua_task_resolve_dns_a (lua_State * L) cd->L = L; cd->to_resolve = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 2)); cd->callback = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 3)); + cd->cbtype = lua_type (L, 4); + if (cd->cbtype != LUA_TNONE && cd->cbtype != LUA_TNIL) { + switch (cd->cbtype) { + case LUA_TBOOLEAN: + cd->cbdata.boolean = lua_toboolean (L, 4); + break; + case LUA_TNUMBER: + cd->cbdata.number = lua_tonumber (L, 4); + break; + case LUA_TSTRING: + cd->cbdata.string = memory_pool_strdup (task->task_pool, lua_tostring (L, 4)); + break; + default: + msg_warn ("cannot handle type %s as callback data", lua_typename (L, cd->cbtype)); + cd->cbtype = LUA_TNONE; + break; + } + } + if (!cd->to_resolve || !cd->callback) { msg_info ("invalid parameters passed to function"); return 0; @@ -432,6 +501,24 @@ lua_task_resolve_dns_txt (lua_State * L) cd->L = L; cd->to_resolve = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 2)); cd->callback = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 3)); + cd->cbtype = lua_type (L, 4); + if (cd->cbtype != LUA_TNONE && cd->cbtype != LUA_TNIL) { + switch (cd->cbtype) { + case LUA_TBOOLEAN: + cd->cbdata.boolean = lua_toboolean (L, 4); + break; + case LUA_TNUMBER: + cd->cbdata.number = lua_tonumber (L, 4); + break; + case LUA_TSTRING: + cd->cbdata.string = memory_pool_strdup (task->task_pool, lua_tostring (L, 4)); + break; + default: + msg_warn ("cannot handle type %s as callback data", lua_typename (L, cd->cbtype)); + cd->cbtype = LUA_TNONE; + break; + } + } if (!cd->to_resolve || !cd->callback) { msg_info ("invalid parameters passed to function"); return 0; @@ -456,6 +543,24 @@ lua_task_resolve_dns_ptr (lua_State * L) cd->L = L; cd->to_resolve = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 2)); cd->callback = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 3)); + cd->cbtype = lua_type (L, 4); + if (cd->cbtype != LUA_TNONE && cd->cbtype != LUA_TNIL) { + switch (cd->cbtype) { + case LUA_TBOOLEAN: + cd->cbdata.boolean = lua_toboolean (L, 4); + break; + case LUA_TNUMBER: + cd->cbdata.number = lua_tonumber (L, 4); + break; + case LUA_TSTRING: + cd->cbdata.string = memory_pool_strdup (task->task_pool, lua_tostring (L, 4)); + break; + default: + msg_warn ("cannot handle type %s as callback data", lua_typename (L, cd->cbtype)); + cd->cbtype = LUA_TNONE; + break; + } + } ina = memory_pool_alloc (task->task_pool, sizeof (struct in_addr)); if (!cd->to_resolve || !cd->callback || !inet_aton (cd->to_resolve, ina)) { msg_info ("invalid parameters passed to function"); diff --git a/src/main.h b/src/main.h index b934ae31e..9269d4ca3 100644 --- a/src/main.h +++ b/src/main.h @@ -204,6 +204,7 @@ struct worker_task { gchar *raw_headers; /**< list of raw headers */ GList *received; /**< list of received headers */ GList *urls; /**< list of parsed urls */ + GList *emails; /**< list of parsed emails */ GList *images; /**< list of images */ GHashTable *results; /**< hash table of metric_result indexed by * metric's name */ diff --git a/src/message.c b/src/message.c index 8e8b8feb0..010edf22b 100644 --- a/src/message.c +++ b/src/message.c @@ -536,13 +536,13 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont /* Skip attachements */ #ifndef GMIME24 cd = g_mime_part_get_content_disposition (GMIME_PART (part)); - if (cd && g_ascii_strcasecmp (cd, "attachment") == 0) { + if (cd && g_ascii_strcasecmp (cd, "attachment") == 0 && !task->cfg->check_text_attachements) { debug_task ("skip attachments for checking as text parts"); return; } #else cd = g_mime_object_get_disposition (GMIME_OBJECT (part)); - if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0) { + if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0 && !task->cfg->check_text_attachements) { debug_task ("skip attachments for checking as text parts"); return; } diff --git a/src/plugins/lua/emails.lua b/src/plugins/lua/emails.lua new file mode 100644 index 000000000..969c762ec --- /dev/null +++ b/src/plugins/lua/emails.lua @@ -0,0 +1,167 @@ +-- Emails is module for different checks for emails inside messages + +-- Rules format: +-- symbol = sym, map = file:///path/to/file, domain_only = yes +-- symbol = sym2, dnsbl = bl.somehost.com, domain_only = no +local rules = {} + +function split(str, delim, maxNb) + -- Eliminate bad cases... + if string.find(str, delim) == nil then + return { str } + end + if maxNb == nil or maxNb < 1 then + maxNb = 0 -- No limit + end + local result = {} + local pat = "(.-)" .. delim .. "()" + local nb = 0 + local lastPos + for part, pos in string.gfind(str, pat) do + nb = nb + 1 + result[nb] = part + lastPos = pos + if nb == maxNb then break end + end + -- Handle the last field + if nb ~= maxNb then + result[nb + 1] = string.sub(str, lastPos) + end + return result +end + +function emails_dns_cb(task, to_resolve, results, err, symbol) + if results then + task:insert_result(symbol, 1) + end +end + +-- Check rule for a single email +function check_email_rule(task, rule, addr) + if rule['dnsbl'] then + local to_resolve = '' + if rule['domain_only'] then + to_resolve = string.format('%s.%s', addr:get_host(), rule['dnsbl']) + else + to_resolve = string.format('%s.%s.%s', addr:get_user(), addr:get_host(), rule['dnsbl']) + end + task:resolve_dns_a(to_resolve, 'emails_dns_cb', rule['symbol']) + elseif rule['map'] then + if rule['domain_only'] then + if rule['map']:get_key(addr:get_host()) then + task:insert_result(rule['symbol'], 1) + end + else + if rule['map']:get_key(string.format('%s@%s', addr:get_user(), addr:get_host())) then + task:insert_result(rule['symbol'], 1) + end + end + end +end + +-- Check email +function check_emails(task) + local emails = task:get_emails() + local checked = {} + if emails then + for _,addr in ipairs(emails) do + local to_check = string.format('%s@%s', addr:get_user(), addr:get_host()) + if not checked['to_check'] then + for _,rule in ipairs(rules) do + check_email_rule(task, rule, addr) + end + checked[to_check] = true + end + end + end +end + +-- Add rule to ruleset +local function add_emails_rule(params) + local newrule = { + name = nil, + dnsbl = nil, + map = nil, + domain_only = false, + symbol = nil + } + for _,param in ipairs(params) do + local _,_,name,value = string.find(param, '([a-zA-Z_0-9]+)%s*=%s*(.+)') + if not name or not value then + rspamd_logger:err('invalid rule: '..param) + return nil + end + if name == 'dnsbl' then + newrule['dnsbl'] = value + newrule['name'] = value + elseif name == 'map' then + newrule['name'] = value + newrule['map'] = rspamd_config:add_hash_map (newrule['name']) + elseif name == 'symbol' then + newrule['symbol'] = value + elseif name == 'domain_only' then + if value == 'yes' or value == 'true' or value == '1' then + newrule['domain_only'] = true + end + else + rspamd_logger:err('invalid rule option: '.. name) + return nil + end + + end + if not newrule['symbol'] or (not newrule['map'] and not newrule['dnsbl']) then + rspamd_logger:err('incomplete rule') + return nil + end + table.insert(rules, newrule) + return newrule +end + + +-- Registration +if type(rspamd_config.get_api_version) ~= 'nil' then + if rspamd_config:get_api_version() >= 2 then + rspamd_config:register_module_option('emails', 'rule', 'string') + else + rspamd_logger:err('Invalid rspamd version for this plugin') + end +end + +local opts = rspamd_config:get_all_opt('emails') +if opts then + local strrules = opts['rule'] + if strrules then + if type(strrules) == 'table' then + for _,value in ipairs(strrules) do + local params = split(value, ',') + local rule = add_emails_rule (params) + if not rule then + rspamd_logger:err('cannot add rule: "'..value..'"') + else + if type(rspamd_config.get_api_version) ~= 'nil' then + rspamd_config:register_virtual_symbol(rule['symbol'], 1.0) + end + end + end + elseif type(strrules) == 'string' then + local params = split(strrules, ',') + local rule = add_emails_rule (params) + if not rule then + rspamd_logger:err('cannot add rule: "'..strrules..'"') + else + if type(rspamd_config.get_api_version) ~= 'nil' then + rspamd_config:register_virtual_symbol(rule['symbol'], 1.0) + end + end + end + end +end + +if table.maxn(rules) > 0 then + -- add fake symbol to check all maps inside a single callback + if type(rspamd_config.get_api_version) ~= 'nil' then + rspamd_config:register_callback_symbol('EMAILS', 1.0, 'check_emails') + else + rspamd_config:register_symbol('EMAILS', 1.0, 'check_emails') + end +end diff --git a/src/plugins/lua/multimap.lua b/src/plugins/lua/multimap.lua index 6986c8c72..9512ff890 100644 --- a/src/plugins/lua/multimap.lua +++ b/src/plugins/lua/multimap.lua @@ -27,7 +27,7 @@ function split(str, delim, maxNb) return result end -function rbl_cb(task, to_resolve, results, err) +function multimap_rbl_cb(task, to_resolve, results, err) if results then local _,_,o4,o3,o2,o1,in_rbl = string.find(to_resolve, '(%d+)%.(%d+)%.(%d+)%.(%d+)%.(.+)') -- Get corresponding rule by rbl name @@ -71,13 +71,13 @@ function check_multimap(task) if ip then local _,_,o1,o2,o3,o4 = string.find(ip, '(%d+)%.(%d+)%.(%d+)%.(%d+)') local rbl_str = o4 .. '.' .. o3 .. '.' .. o2 .. '.' .. o1 .. '.' .. rule['map'] - task:resolve_dns_a(rbl_str, 'rbl_cb') + task:resolve_dns_a(rbl_str, 'multimap_rbl_cb') end end end end -function add_rule(params) +local function add_multimap_rule(params) local newrule = { type = 'ip', header = nil, @@ -143,7 +143,7 @@ if opts then if type(strrules) == 'table' then for _,value in ipairs(strrules) do local params = split(value, ',') - local rule = add_rule (params) + local rule = add_multimap_rule (params) if not rule then rspamd_logger:err('cannot add rule: "'..value..'"') else @@ -154,7 +154,7 @@ if opts then end elseif type(strrules) == 'string' then local params = split(strrules, ',') - local rule = add_rule (params) + local rule = add_multimap_rule (params) if not rule then rspamd_logger:err('cannot add rule: "'..strrules..'"') else diff --git a/src/plugins/lua/trie.lua b/src/plugins/lua/trie.lua index d4bafe943..98248f29f 100644 --- a/src/plugins/lua/trie.lua +++ b/src/plugins/lua/trie.lua @@ -50,7 +50,6 @@ local function add_trie(params) local patterns = split(params[2], ',') local trie = {} trie['trie'] = rspamd_trie:create(true) - print (type(trie['trie'])) for num,pattern in ipairs(patterns) do trie['trie']:add_pattern(pattern, num) end @@ -64,7 +63,6 @@ end function check_trie(task) for _,trie in ipairs(tries) do - print (type(trie['trie'])) if trie['trie']:search_task(task) then task:insert_result(trie['symbol'], 1) end diff --git a/src/url.c b/src/url.c index f1b4242a1..596d17d3a 100644 --- a/src/url.c +++ b/src/url.c @@ -41,7 +41,7 @@ (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT) struct _proto { - guchar *name; + guchar *name; gint port; uintptr_t *unused; guint need_slashes:1; @@ -55,6 +55,7 @@ typedef struct url_match_s { gsize m_len; const gchar *pattern; const gchar *prefix; + gboolean add_prefix; } url_match_t; struct url_matcher { @@ -1111,20 +1112,24 @@ domain: static gboolean url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) { + const gchar *p; /* Check what we have found */ if (pos > begin && *pos == '@') { - if (is_atom (*(pos - 1)) && is_domain (*(pos + 1))) { - match->m_begin = pos + 1; + /* Try to extract it with username */ + p = pos - 1; + while (p > begin && is_atom (*p)) { + p --; + } + if (!is_atom (*p)) { + match->m_begin = p + 1; return TRUE; } } else { - while (pos < end && is_atom (*pos)) { - if (*pos == '@') { - match->m_begin = pos + 1; - return TRUE; - } - pos ++; + p = pos + strlen (match->pattern); + if (is_atom (*p)) { + match->m_begin = p; + return TRUE; } } return FALSE; @@ -1141,6 +1146,7 @@ url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match p ++; } match->m_len = p - match->m_begin; + match->add_prefix = TRUE; return TRUE; } @@ -1148,7 +1154,7 @@ void url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html) { gint rc, off = 0; - gchar *url_str = NULL; + gchar *url_str = NULL; struct uri *new; const guint8 *p, *end; @@ -1176,8 +1182,13 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text g_strstrip (url_str); rc = parse_uri (new, url_str, pool); if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) { - g_tree_insert (is_html ? part->html_urls : part->urls, url_str, new); - task->urls = g_list_prepend (task->urls, new); + if (new->protocol == PROTOCOL_MAILTO) { + task->emails = g_list_prepend (task->emails, new); + } + else { + g_tree_insert (is_html ? part->html_urls : part->urls, url_str, new); + task->urls = g_list_prepend (task->urls, new); + } } else { msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); @@ -1197,7 +1208,7 @@ gboolean url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gint *res, gchar **url_str) { const gchar *end, *pos; - gint idx; + gint idx, l; struct url_matcher *matcher; url_match_t m; @@ -1210,10 +1221,18 @@ url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gint *res, gch matcher = &matchers[idx]; m.pattern = matcher->pattern; m.prefix = matcher->prefix; + m.add_prefix = FALSE; if (matcher->start (begin, end, pos, &m) && matcher->end (begin, end, pos, &m)) { - *url_str = memory_pool_alloc (pool, m.m_len + 1); - memcpy (*url_str, m.m_begin, m.m_len); - (*url_str)[m.m_len] = '\0'; + if (m.add_prefix) { + l = m.m_len + 1 + strlen (m.prefix); + *url_str = memory_pool_alloc (pool, l); + rspamd_snprintf (*url_str, l, "%s%*s", m.prefix, m.m_len, m.m_begin); + } + else { + *url_str = memory_pool_alloc (pool, m.m_len + 1); + memcpy (*url_str, m.m_begin, m.m_len); + (*url_str)[m.m_len] = '\0'; + } } else { diff --git a/src/url.h b/src/url.h index 6b08682ba..eb11ceba3 100644 --- a/src/url.h +++ b/src/url.h @@ -60,7 +60,7 @@ enum uri_errno { URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */ URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */ URI_ERRNO_INVALID_PORT, /* Port number is bad */ - URI_ERRNO_INVALID_PORT_RANGE, /* Port number is not within 0-65535 */ + URI_ERRNO_INVALID_PORT_RANGE /* Port number is not within 0-65535 */ }; enum protocol { @@ -68,8 +68,8 @@ enum protocol { PROTOCOL_FTP, PROTOCOL_HTTP, PROTOCOL_HTTPS, - - PROTOCOL_UNKNOWN, + PROTOCOL_MAILTO, + PROTOCOL_UNKNOWN }; #define struri(uri) ((uri)->string) diff --git a/src/worker.c b/src/worker.c index 160aa6969..1d6ec05fb 100644 --- a/src/worker.c +++ b/src/worker.c @@ -264,6 +264,9 @@ free_task (struct worker_task *task, gboolean is_soft) if (task->urls) { g_list_free (task->urls); } + if (task->emails) { + g_list_free (task->emails); + } if (task->images) { g_list_free (task->images); } -- 2.39.5