diff options
Diffstat (limited to 'src/lua/lua_task.c')
-rw-r--r-- | src/lua/lua_task.c | 315 |
1 files changed, 98 insertions, 217 deletions
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 4f92bdb06..270d5ec06 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -14,6 +14,8 @@ * limitations under the License. */ #include "lua_common.h" +#include "lua_url.h" + #include "message.h" #include "images.h" #include "archives.h" @@ -176,7 +178,7 @@ LUA_FUNCTION_DEF (task, adjust_result); * - module: string * - score: number * - priority: integer - * - flags: flags sring + * - flags: flags string * - result: named result if needed * * @param {rspamd_action or string} action a numeric or string action value @@ -515,10 +517,19 @@ LUA_FUNCTION_DEF (task, has_from); * @method task:get_from([type]) * Return SMTP or MIME sender for a task. This function returns an internet address which one is a table with the following structure: * + * - `raw` - the original value without any processing * - `name` - name of internet address in UTF8, e.g. for `Vsevolod Stakhov <blah@foo.com>` it returns `Vsevolod Stakhov` * - `addr` - address part of the address * - `user` - user part (if present) of the address, e.g. `blah` * - `domain` - domain part (if present), e.g. `foo.com` + * - `flags` - table with following keys set to true if given condition fulfilled: + * - [valid] - valid SMTP address in conformity with https://tools.ietf.org/html/rfc5321#section-4.1. + * - [ip] - domain is IPv4/IPv6 address + * - [braced] - angled `<blah@foo.com>` address + * - [quoted] - quoted user part + * - [empty] - empty address + * - [backslash] - user part contains backslash + * - [8bit] - contains 8bit characters * @param {integer|string} type if specified has the following meaning: `0` or `any` means try SMTP sender and fallback to MIME if failed, `1` or `smtp` means checking merely SMTP sender and `2` or `mime` means MIME `From:` only * @return {address} sender or `nil` */ @@ -632,7 +643,7 @@ LUA_FUNCTION_DEF (task, get_archives); */ LUA_FUNCTION_DEF (task, get_dkim_results); /*** - * @method task:get_symbol(name) + * @method task:get_symbol(name, [shadow_result_name]) * Searches for a symbol `name` in all metrics results and returns a list of tables * one per metric that describes the symbol inserted. Please note that this function * is intended to return values for **inserted** symbols, so if this symbol was not @@ -644,7 +655,7 @@ LUA_FUNCTION_DEF (task, get_dkim_results); * - `options` - a table of strings representing options of a symbol * - `group` - a group of symbol (or 'ungrouped') * @param {string} name symbol's name - * @return {list of tables} list of tables or nil if symbol was not found in any metric + * @return {list of tables} list of tables or nil if symbol was not found */ LUA_FUNCTION_DEF (task, get_symbol); /*** @@ -654,7 +665,7 @@ LUA_FUNCTION_DEF (task, get_symbol); */ LUA_FUNCTION_DEF (task, get_symbols_all); /*** - * @method task:get_symbols() + * @method task:get_symbols([shadow_result_name]) * Returns array of all symbols matched for this task * @return {table, table} table of strings with symbols names + table of theirs scores */ @@ -694,7 +705,7 @@ LUA_FUNCTION_DEF (task, get_symbols_tokens); LUA_FUNCTION_DEF (task, process_ann_tokens); /*** - * @method task:has_symbol(name) + * @method task:has_symbol(name, [shadow_result_name]) * Fast path to check if a specified symbol is in the task's results * @param {string} name symbol's name * @return {boolean} `true` if symbol has been found @@ -2003,9 +2014,11 @@ lua_task_adjust_result (lua_State * L) } if (s) { - metric_res->score -= s->score; - s->score = weight; - metric_res->score += s->score; + if (!isnan (weight)) { + metric_res->score -= s->score; + s->score = weight; + metric_res->score += s->score; + } } else { return luaL_error (L, "symbol not found: %s", symbol_name); @@ -2235,61 +2248,7 @@ lua_task_append_message (lua_State * L) return 0; } -struct lua_tree_cb_data { - lua_State *L; - int i; - gint mask; - gint need_images; - gdouble skip_prob; - guint64 xoroshiro_state[4]; -}; - -static void -lua_tree_url_callback (gpointer key, gpointer value, gpointer ud) -{ - struct rspamd_lua_url *lua_url; - struct rspamd_url *url = (struct rspamd_url *)value; - struct lua_tree_cb_data *cb = ud; - - if (url->protocol & cb->mask) { - if (!cb->need_images && (url->flags & RSPAMD_URL_FLAG_IMAGE)) { - return; - } - - if (cb->skip_prob > 0) { - gdouble coin = rspamd_random_double_fast_seed (cb->xoroshiro_state); - - if (coin < cb->skip_prob) { - return; - } - } - - lua_url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url)); - rspamd_lua_setclass (cb->L, "rspamd{url}", -1); - lua_url->url = url; - lua_rawseti (cb->L, -2, cb->i++); - } -} - -static inline gsize -lua_task_urls_adjust_skip_prob (struct rspamd_task *task, - struct lua_tree_cb_data *cb, gsize sz, gsize max_urls) -{ - if (max_urls > 0 && sz > max_urls) { - cb->skip_prob = 1.0 - ((gdouble)max_urls) / (gdouble)sz; - /* - * Use task dependent probabilistic seed to ensure that - * consequent task:get_urls return the same list of urls - */ - memcpy (&cb->xoroshiro_state[0], &task->task_timestamp, - MIN (sizeof (cb->xoroshiro_state[0]), sizeof (task->task_timestamp))); - memcpy (&cb->xoroshiro_state[1], MESSAGE_FIELD (task, digest), - sizeof (cb->xoroshiro_state[1]) * 3); - sz = max_urls; - } - return sz; -} static gint lua_task_get_urls (lua_State * L) @@ -2297,12 +2256,9 @@ lua_task_get_urls (lua_State * L) LUA_TRACE_POINT; struct rspamd_task *task = lua_check_task (L, 1); struct lua_tree_cb_data cb; - gint protocols_mask = 0; - static const gint default_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS| - PROTOCOL_FILE|PROTOCOL_FTP; - const gchar *cache_name = "emails+urls"; struct rspamd_url *u; - gboolean need_images = FALSE; + static const gint default_protocols_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS| + PROTOCOL_FILE|PROTOCOL_FTP; gsize sz, max_urls = 0; if (task) { @@ -2316,135 +2272,26 @@ lua_task_get_urls (lua_State * L) return 1; } - if (lua_gettop (L) >= 2) { - if (lua_type (L, 2) == LUA_TBOOLEAN) { - protocols_mask = default_mask; - if (lua_toboolean (L, 2)) { - protocols_mask |= PROTOCOL_MAILTO; - } - } - else if (lua_type (L, 2) == LUA_TTABLE) { - for (lua_pushnil (L); lua_next (L, 2); lua_pop (L, 1)) { - int nmask; - const gchar *pname = lua_tostring (L, -1); - - nmask = rspamd_url_protocol_from_string (pname); - - if (nmask != PROTOCOL_UNKNOWN) { - protocols_mask |= nmask; - } - else { - msg_info ("bad url protocol: %s", pname); - } - } - } - else if (lua_type (L, 2) == LUA_TSTRING) { - const gchar *plist = lua_tostring (L, 2); - gchar **strvec; - gchar * const *cvec; - - strvec = g_strsplit_set (plist, ",;", -1); - cvec = strvec; - - while (*cvec) { - int nmask; - - nmask = rspamd_url_protocol_from_string (*cvec); - - if (nmask != PROTOCOL_UNKNOWN) { - protocols_mask |= nmask; - } - else { - msg_info ("bad url protocol: %s", *cvec); - } - - cvec ++; - } - - g_strfreev (strvec); - } - else { - protocols_mask = default_mask; - } - - if (lua_type (L, 3) == LUA_TBOOLEAN) { - need_images = lua_toboolean (L, 3); - } - } - else { - protocols_mask = default_mask; + /* Exclude RSPAMD_URL_FLAG_CONTENT to preserve backward compatibility */ + if (!lua_url_cbdata_fill (L, 2, &cb, default_protocols_mask, + (~RSPAMD_URL_FLAG_CONTENT), max_urls)) { + return luaL_error (L, "invalid arguments"); } - memset (&cb, 0, sizeof (cb)); - cb.i = 1; - cb.L = L; - cb.mask = protocols_mask; - cb.need_images = need_images; + sz = kh_size (MESSAGE_FIELD (task, urls)); + sz = lua_url_adjust_skip_prob (task->task_timestamp, + MESSAGE_FIELD (task, digest), &cb, sz); - if (protocols_mask & PROTOCOL_MAILTO) { - if (need_images) { - cache_name = "emails+urls+img"; - } - else { - cache_name = "emails+urls"; - } - - sz = kh_size (MESSAGE_FIELD (task, urls)); - - sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls); + lua_createtable (L, sz, 0); - if (protocols_mask == (default_mask|PROTOCOL_MAILTO)) { - /* Can use cached version */ - if (!lua_task_get_cached (L, task, cache_name)) { - lua_createtable (L, sz, 0); - kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - lua_tree_url_callback (u, u, &cb); - }); - lua_task_set_cached (L, task, cache_name, -1); - } - } - else { - lua_createtable (L, sz, 0); - kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - lua_tree_url_callback (u, u, &cb); - }); - } - - } - else { - if (need_images) { - cache_name = "urls+img"; - } - else { - cache_name = "urls"; - } + kh_foreach_key (MESSAGE_FIELD (task, urls), u, { + lua_tree_url_callback (u, u, &cb); + }); - sz = kh_size (MESSAGE_FIELD (task, urls)); - sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls); - - if (protocols_mask == (default_mask)) { - if (!lua_task_get_cached (L, task, cache_name)) { - lua_createtable (L, sz, 0); - kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - if (!(u->protocol & PROTOCOL_MAILTO)) { - lua_tree_url_callback (u, u, &cb); - } - }); - lua_task_set_cached (L, task, cache_name, -1); - } - } - else { - lua_createtable (L, sz, 0); - kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - if (!(u->protocol & PROTOCOL_MAILTO)) { - lua_tree_url_callback (u, u, &cb); - } - }); - } - } + lua_url_cbdata_dtor (&cb); } else { - return luaL_error (L, "invalid arguments"); + return luaL_error (L, "invalid arguments, no task"); } return 1; @@ -2581,20 +2428,26 @@ lua_task_get_emails (lua_State * L) struct rspamd_task *task = lua_check_task (L, 1); struct lua_tree_cb_data cb; struct rspamd_url *u; + gsize max_urls = 0, sz; if (task) { if (task->message) { - lua_createtable (L, kh_size (MESSAGE_FIELD (task, urls)), 0); - memset (&cb, 0, sizeof (cb)); - cb.i = 1; - cb.L = L; - cb.mask = PROTOCOL_MAILTO; + if (!lua_url_cbdata_fill (L, 2, &cb, PROTOCOL_MAILTO, + (~RSPAMD_URL_FLAG_CONTENT), max_urls)) { + return luaL_error (L, "invalid arguments"); + } + + sz = kh_size (MESSAGE_FIELD (task, urls)); + sz = lua_url_adjust_skip_prob (task->task_timestamp, + MESSAGE_FIELD (task, digest), &cb, sz); + + lua_createtable (L, sz, 0); kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - if ((u->protocol & PROTOCOL_MAILTO)) { - lua_tree_url_callback (u, u, &cb); - } + lua_tree_url_callback (u, u, &cb); }); + + lua_url_cbdata_dtor (&cb); } else { lua_newtable (L); @@ -3300,8 +3153,18 @@ static void lua_push_email_address (lua_State *L, struct rspamd_email_address *addr) { if (addr) { - lua_createtable (L, 0, 4); + lua_createtable (L, 0, 5); + if (addr->raw_len > 0) { + lua_pushstring (L, "raw"); + lua_pushlstring (L, addr->raw, addr->raw_len); + lua_settable (L, -3); + } + else { + lua_pushstring (L, "raw"); + lua_pushstring (L, ""); + lua_settable (L, -3); + } if (addr->addr_len > 0) { lua_pushstring (L, "addr"); lua_pushlstring (L, addr->addr, addr->addr_len); @@ -4398,25 +4261,26 @@ lua_task_get_dkim_results (lua_State *L) static inline gboolean lua_push_symbol_result (lua_State *L, - struct rspamd_task *task, - const gchar *symbol, - struct rspamd_symbol_result *symbol_result, - gboolean add_metric, - gboolean add_name) + struct rspamd_task *task, + const gchar *symbol, + struct rspamd_symbol_result *symbol_result, + struct rspamd_scan_result *metric_res, + gboolean add_metric, + gboolean add_name) { - struct rspamd_scan_result *metric_res; + struct rspamd_symbol_result *s = NULL; struct rspamd_symbol_option *opt; struct rspamd_symbols_group *sym_group; guint i; - gint j = 1, e = 4; + gint j = 1, table_fields_cnt = 4; - if (!symbol_result) { + if (!metric_res) { metric_res = task->result; + } - if (metric_res) { - s = rspamd_task_find_symbol_result (task, symbol, NULL); - } + if (!symbol_result) { + s = rspamd_task_find_symbol_result (task, symbol, metric_res); } else { s = symbol_result; @@ -4424,13 +4288,13 @@ lua_push_symbol_result (lua_State *L, if (s) { if (add_metric) { - e++; + table_fields_cnt++; } if (add_name) { - e++; + table_fields_cnt++; } - lua_createtable (L, 0, e); + lua_createtable (L, 0, table_fields_cnt); if (add_name) { lua_pushstring (L, "name"); @@ -4487,16 +4351,27 @@ lua_task_get_symbol (lua_State *L) struct rspamd_task *task = lua_check_task (L, 1); const gchar *symbol; gboolean found = FALSE; - gint i = 1; symbol = luaL_checkstring (L, 2); if (task && symbol) { + struct rspamd_scan_result *sres = NULL; + + if (lua_isstring (L, 3)) { + sres = rspamd_find_metric_result (task, lua_tostring (L, 3)); + + if (sres == NULL) { + return luaL_error (L, "invalid scan result: %s", + lua_tostring (L, 3)); + } + } + + /* Always push as a table for compatibility :( */ lua_createtable (L, 1, 0); if ((found = lua_push_symbol_result (L, task, symbol, - NULL, TRUE, FALSE))) { - lua_rawseti (L, -2, i++); + NULL, sres, TRUE, FALSE))) { + lua_rawseti (L, -2, 1); } else { /* Pop table */ @@ -4525,7 +4400,13 @@ lua_task_has_symbol (lua_State *L) symbol = luaL_checkstring (L, 2); if (task && symbol) { - found = (rspamd_task_find_symbol_result (task, symbol, NULL) != NULL); + if (lua_isstring (L, 3)) { + found = (rspamd_task_find_symbol_result (task, symbol, + rspamd_find_metric_result (task, lua_tostring (L, 3))) != NULL); + } + else { + found = (rspamd_task_find_symbol_result (task, symbol, NULL) != NULL); + } lua_pushboolean (L, found); } else { @@ -4642,7 +4523,7 @@ lua_task_get_symbols_all (lua_State *L) kh_foreach_value_ptr (mres->symbols, s, { if (!(s->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) { - lua_push_symbol_result (L, task, s->name, s, FALSE, TRUE); + lua_push_symbol_result (L, task, s->name, s, mres, FALSE, TRUE); lua_rawseti (L, -2, i++); } }); |