diff options
27 files changed, 882 insertions, 370 deletions
diff --git a/contrib/hiredis/async.c b/contrib/hiredis/async.c index 851676263..4a2cadf8b 100644 --- a/contrib/hiredis/async.c +++ b/contrib/hiredis/async.c @@ -129,7 +129,6 @@ static redisAsyncContext *redisAsyncInitialize(redisContext *c) { ac->onConnect = NULL; ac->onDisconnect = NULL; - ac->disconnectCbdata = NULL; ac->replies.head = NULL; ac->replies.tail = NULL; @@ -216,10 +215,9 @@ int redisAsyncSetConnectCallback(redisAsyncContext *ac, redisConnectCallback *fn return REDIS_ERR; } -int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn, void *cbdata) { +int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn) { if (ac->onDisconnect == NULL) { ac->onDisconnect = fn; - ac->disconnectCbdata = cbdata; return REDIS_OK; } return REDIS_ERR; @@ -308,10 +306,10 @@ static void __redisAsyncFree(redisAsyncContext *ac) { * this context, the status will always be REDIS_OK. */ if (ac->onDisconnect && (c->flags & REDIS_CONNECTED)) { if (c->flags & REDIS_FREEING) { - ac->onDisconnect(ac,REDIS_OK,ac->disconnectCbdata); + ac->onDisconnect(ac,REDIS_OK); } else { c->flags |= REDIS_FREEING; - ac->onDisconnect(ac,(ac->err == 0) ? REDIS_OK : REDIS_ERR,ac->disconnectCbdata); + ac->onDisconnect(ac,(ac->err == 0) ? REDIS_OK : REDIS_ERR); } } diff --git a/contrib/hiredis/async.h b/contrib/hiredis/async.h index 40a5a49c8..f19139c6f 100644 --- a/contrib/hiredis/async.h +++ b/contrib/hiredis/async.h @@ -54,7 +54,7 @@ typedef struct redisCallbackList { } redisCallbackList; /* Connection callback prototypes */ -typedef void (redisDisconnectCallback)(const struct redisAsyncContext*, int status, void *cbdata); +typedef void (redisDisconnectCallback)(const struct redisAsyncContext*, int status); typedef void (redisConnectCallback)(const struct redisAsyncContext*, int status); /* Context for an async connection to Redis */ @@ -85,8 +85,6 @@ typedef struct redisAsyncContext { /* Called when either the connection is terminated due to an error or per * user request. The status is set accordingly (REDIS_OK, REDIS_ERR). */ redisDisconnectCallback *onDisconnect; - /* Hiredis is just brain-damaged here, need to fix it */ - void *disconnectCbdata; /* Called when the first write event was received. */ redisConnectCallback *onConnect; @@ -109,7 +107,7 @@ redisAsyncContext *redisAsyncConnectBindWithReuse(const char *ip, int port, const char *source_addr); redisAsyncContext *redisAsyncConnectUnix(const char *path); int redisAsyncSetConnectCallback(redisAsyncContext *ac, redisConnectCallback *fn); -int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn, void *cbdata); +int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn); void redisAsyncDisconnect(redisAsyncContext *ac); void redisAsyncFree(redisAsyncContext *ac); diff --git a/contrib/libucl/ucl_parser.c b/contrib/libucl/ucl_parser.c index b048f426a..5b5681863 100644 --- a/contrib/libucl/ucl_parser.c +++ b/contrib/libucl/ucl_parser.c @@ -461,12 +461,15 @@ ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, } if (!found) { if (strict && parser->var_handler != NULL) { - if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, + if (parser->var_handler (p, remain, &dst, &dstlen, &need_free, parser->var_data)) { memcpy (d, dst, dstlen); - ret += dstlen; - d += remain; + ret += remain; + d += dstlen; found = true; + if (need_free) { + free (dst); + } } } diff --git a/contrib/replxx/CMakeLists.txt b/contrib/replxx/CMakeLists.txt index 9225fd8f4..749e61208 100644 --- a/contrib/replxx/CMakeLists.txt +++ b/contrib/replxx/CMakeLists.txt @@ -73,5 +73,6 @@ target_include_directories( ) set( TARGETS ${TARGETS} rspamd-replxx ) target_compile_definitions(rspamd-replxx PRIVATE REPLXX_BUILDING_DLL) +target_link_libraries(rspamd-replxx "${RSPAMD_REQUIRED_LIBRARIES}") install( TARGETS ${TARGETS} LIBRARY DESTINATION ${RSPAMD_LIBDIR})
\ No newline at end of file diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua index b410c2a14..fbb7482c1 100644 --- a/lualib/lua_content/pdf.lua +++ b/lualib/lua_content/pdf.lua @@ -1066,7 +1066,7 @@ local function search_urls(task, pdf) elseif k == 'URI' then v = maybe_dereference_object(v, pdf, task) if type(v) == 'string' then - local url = rspamd_url.create(task:get_mempool(), v) + local url = rspamd_url.create(task:get_mempool(), v, {'content'}) if url then lua_util.debugm(N, task, 'found url %s in object %s:%s', diff --git a/lualib/lua_scanners/common.lua b/lualib/lua_scanners/common.lua index f286d963a..5c298b46f 100644 --- a/lualib/lua_scanners/common.lua +++ b/lualib/lua_scanners/common.lua @@ -68,6 +68,7 @@ local function yield_result(task, rule, vname, dyn_weight, is_fail) local symbol local threat_table = {} local threat_info + local flags -- This should be more generic if not is_fail then @@ -112,11 +113,14 @@ local function yield_result(task, rule, vname, dyn_weight, is_fail) if rule.action and is_fail ~= 'fail' and not all_whitelisted then threat_table = table.concat(threat_table, '; ') + if rule.action ~= 'reject' then + flags = 'least' + end task:set_pre_result(rule.action, lua_util.template(rule.message or 'Rejected', { SCANNER = rule.name, VIRUS = threat_table, - }), rule.name, nil, nil, 'least') + }), rule.name, nil, nil, flags) end end diff --git a/lualib/lua_selectors/extractors.lua b/lualib/lua_selectors/extractors.lua index 625af435c..6e023c780 100644 --- a/lualib/lua_selectors/extractors.lua +++ b/lualib/lua_selectors/extractors.lua @@ -388,6 +388,58 @@ The first argument must be header name.]], ]], ['args_schema'] = { ts.one_of { 'stem', 'raw', 'norm', 'full' }:is_optional()}, }, + -- Get queue ID + ['queueid'] = { + ['get_value'] = function(task) + local queueid = task:get_queue_id() + if queueid then return queueid,'string' end + return nil + end, + ['description'] = [[Get queue ID]], + }, + -- Get ID of the task being processed + ['uid'] = { + ['get_value'] = function(task) + local uid = task:get_uid() + if uid then return uid,'string' end + return nil + end, + ['description'] = [[Get ID of the task being processed]], + }, + -- Get message ID of the task being processed + ['messageid'] = { + ['get_value'] = function(task) + local mid = task:get_message_id() + if mid then return mid,'string' end + return nil + end, + ['description'] = [[Get message ID]], + }, + -- Get specific symbol + ['symbol'] = { + ['get_value'] = function(task, args) + local symbol = task:get_symbol(args[1], args[2]) + if symbol then + return symbol[1],'table' + end + end, + ['description'] = 'Get specific symbol. The first argument must be the symbol name. ' .. + 'The second argument is an optional shadow result name. ' .. + 'Returns the symbol table. See task:get_symbol()', + ['args_schema'] = {ts.string, ts.string:is_optional()} + }, + -- Get full scan result + ['scan_result'] = { + ['get_value'] = function(task, args) + local res = task:get_metric_result(args[1]) + if res then + return res,'table' + end + end, + ['description'] = 'Get full scan result (either default or shadow if shadow result name is specified)' .. + 'Returns the result table. See task:get_metric_result()', + ['args_schema'] = {ts.string:is_optional()} + }, } -return extractors
\ No newline at end of file +return extractors diff --git a/lualib/lua_selectors/transforms.lua b/lualib/lua_selectors/transforms.lua index 5f1a4dca0..85a445f67 100644 --- a/lualib/lua_selectors/transforms.lua +++ b/lualib/lua_selectors/transforms.lua @@ -413,12 +413,34 @@ Empty string comes the first argument or 'true', non-empty string comes nil]], end end, ['description'] = 'Applies mask to IP address.' .. - ' The first argument is the mask for IPv4 addresses, the second is the mask for IPv6 addresses.', + ' The first argument is the mask for IPv4 addresses, the second is the mask for IPv6 addresses.', ['args_schema'] = {(ts.number + ts.string / tonumber), (ts.number + ts.string / tonumber):is_optional()} }, + -- Returns the string(s) with all non ascii chars replaced + ['to_ascii'] = { + ['types'] = { + ['string'] = true, + ['list'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + if type(inp) == 'table' then + return fun.map( + function(s) + return string.gsub(tostring(s), '[\128-\255]', args[1] or '?') + end, inp), 'string_list' + else + return string.gsub(tostring(inp), '[\128-\255]', '?'), 'string' + end + end, + ['description'] = 'Returns the string with all non-ascii bytes replaced with the character ' .. + 'given as second argument or `?`', + ['args_schema'] = {ts.string:is_optional()} + }, + } transform_function.match = transform_function.regexp -return transform_function
\ No newline at end of file +return transform_function diff --git a/src/controller.c b/src/controller.c index 4ed3f187c..1f8524b84 100644 --- a/src/controller.c +++ b/src/controller.c @@ -2863,7 +2863,8 @@ rspamd_controller_handle_plugins (struct rspamd_http_connection_entry *conn_ent, } g_assert (npath != NULL); - ucl_array_append (npath, ucl_object_fromstring (k)); + rspamd_ftok_t *key_tok = (rspamd_ftok_t *)k; + ucl_array_append (npath, ucl_object_fromlstring (key_tok->begin, key_tok->len)); } rspamd_controller_send_ucl (conn_ent, plugins); @@ -2959,14 +2960,31 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e struct rspamd_http_connection_entry **pconn; struct rspamd_controller_worker_ctx *ctx; lua_State *L; - gchar *url_str; + struct http_parser_url u; + rspamd_ftok_t lookup; - url_str = rspamd_fstring_cstr (msg->url); - cbd = g_hash_table_lookup (session->ctx->plugins, url_str); - g_free (url_str); + + http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u); + + if (u.field_set & (1 << UF_PATH)) { + guint unnorm_len; + lookup.begin = msg->url->str + u.field_data[UF_PATH].off; + lookup.len = u.field_data[UF_PATH].len; + + rspamd_http_normalize_path_inplace ((gchar *)lookup.begin, + lookup.len, + &unnorm_len); + lookup.len = unnorm_len; + } + else { + lookup.begin = msg->url->str; + lookup.len = msg->url->len; + } + + cbd = g_hash_table_lookup (session->ctx->plugins, &lookup); if (cbd == NULL || cbd->handler == NULL) { - msg_err_session ("plugin handler %V has not been found", msg->url); + msg_err_session ("plugin handler %T has not been found", &lookup); rspamd_controller_send_error (conn_ent, 404, "No command associated"); return 0; } @@ -3011,15 +3029,38 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e /* Callback */ lua_rawgeti (L, LUA_REGISTRYINDEX, cbd->handler->idx); + /* Task */ ptask = lua_newuserdata (L, sizeof (*ptask)); rspamd_lua_setclass (L, "rspamd{task}", -1); *ptask = task; + /* Connection */ pconn = lua_newuserdata (L, sizeof (*pconn)); rspamd_lua_setclass (L, "rspamd{csession}", -1); *pconn = conn_ent; - if (lua_pcall (L, 2, 0, 0) != 0) { + /* Query arguments */ + GHashTable *params; + GHashTableIter it; + gpointer k, v; + + params = rspamd_http_message_parse_query (msg); + lua_createtable (L, g_hash_table_size (params), 0); + g_hash_table_iter_init (&it, params); + + while (g_hash_table_iter_next (&it, &k, &v)) { + rspamd_ftok_t *key_tok = (rspamd_ftok_t *)k, + *value_tok = (rspamd_ftok_t *)v; + + lua_pushlstring (L, key_tok->begin, key_tok->len); + /* TODO: consider rspamd_text here */ + lua_pushlstring (L, value_tok->begin, value_tok->len); + lua_settable (L, -3); + } + + g_hash_table_unref (params); + + if (lua_pcall (L, 3, 0, 0) != 0) { rspamd_controller_send_error (conn_ent, 503, "Cannot run callback: %s", lua_tostring (L, -1)); lua_settop (L, 0); @@ -3391,7 +3432,7 @@ rspamd_controller_register_plugin_path (lua_State *L, { struct rspamd_controller_plugin_cbdata *cbd; const ucl_object_t *elt; - GString *full_path; + rspamd_fstring_t *full_path; cbd = g_malloc0 (sizeof (*cbd)); cbd->L = L; @@ -3418,15 +3459,18 @@ rspamd_controller_register_plugin_path (lua_State *L, cbd->need_task = TRUE; } - full_path = g_string_new ("/plugins/"); - rspamd_printf_gstring (full_path, "%s/%s", - plugin_name, path); + full_path = rspamd_fstring_new_init ("/plugins/", sizeof ("/plugins/") - 1); + /* Zero terminated */ + rspamd_printf_fstring (&full_path, "%s/%s%c", + plugin_name, path, '\0'); rspamd_http_router_add_path (ctx->http, full_path->str, rspamd_controller_handle_lua_plugin); - g_hash_table_insert (ctx->plugins, full_path->str, cbd); - g_string_free (full_path, FALSE); /* Do not free data */ + rspamd_ftok_t *key_tok = rspamd_ftok_map (full_path); + /* Truncate stupid \0 symbol to enable lookup */ + key_tok->len --; + g_hash_table_insert (ctx->plugins, key_tok, cbd); } static void @@ -3499,9 +3543,9 @@ start_controller_worker (struct rspamd_worker *worker) ctx->srv = worker->srv; ctx->custom_commands = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); - ctx->plugins = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, g_free, - rspamd_plugin_cbdata_dtor); + ctx->plugins = g_hash_table_new_full (rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, + rspamd_plugin_cbdata_dtor); if (isnan (ctx->task_timeout)) { if (isnan (ctx->cfg->task_timeout)) { @@ -3642,6 +3686,10 @@ start_controller_worker (struct rspamd_worker *worker) "Access-Control-Allow-Origin", "*"); } + /* Disable all results caching, see #3330 */ + rspamd_http_router_add_header (ctx->http, + "Cache-Control", "no-store"); + rspamd_http_router_set_unknown_handler (ctx->http, rspamd_controller_handle_unknown); diff --git a/src/libserver/redis_pool.c b/src/libserver/redis_pool.c index 4d1a18676..7e835013c 100644 --- a/src/libserver/redis_pool.c +++ b/src/libserver/redis_pool.c @@ -258,10 +258,9 @@ rspamd_redis_pool_schedule_timeout (struct rspamd_redis_pool_connection *conn) } static void -rspamd_redis_pool_on_disconnect (const struct redisAsyncContext *ac, int status, - void *ud) +rspamd_redis_pool_on_disconnect (const struct redisAsyncContext *ac, int status) { - struct rspamd_redis_pool_connection *conn = ud; + struct rspamd_redis_pool_connection *conn = ac->data; /* * Here, we know that redis itself will free this connection @@ -313,13 +312,13 @@ rspamd_redis_pool_new_connection (struct rspamd_redis_pool *pool, g_hash_table_insert (elt->pool->elts_by_ctx, ctx, conn); g_queue_push_head_link (elt->active, conn->entry); conn->ctx = ctx; + ctx->data = conn; rspamd_random_hex (conn->tag, sizeof (conn->tag)); REF_INIT_RETAIN (conn, rspamd_redis_pool_conn_dtor); msg_debug_rpool ("created new connection to %s:%d: %p", ip, port, ctx); redisLibevAttach (pool->event_loop, ctx); - redisAsyncSetDisconnectCallback (ctx, rspamd_redis_pool_on_disconnect, - conn); + redisAsyncSetDisconnectCallback (ctx, rspamd_redis_pool_on_disconnect); if (password) { redisAsyncCommand (ctx, NULL, NULL, diff --git a/src/libserver/url.c b/src/libserver/url.c index db89073f5..195727c13 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -214,6 +214,35 @@ struct url_matcher static_matchers[] = { 0} }; +struct rspamd_url_flag_name { + const gchar *name; + gint flag; + gint hash; +} url_flag_names[] = { + {"phished", RSPAMD_URL_FLAG_PHISHED, -1}, + {"numeric", RSPAMD_URL_FLAG_NUMERIC, -1}, + {"obscured", RSPAMD_URL_FLAG_OBSCURED, -1}, + {"redirected", RSPAMD_URL_FLAG_REDIRECTED, -1}, + {"html_displayed", RSPAMD_URL_FLAG_HTML_DISPLAYED, -1}, + {"text", RSPAMD_URL_FLAG_FROM_TEXT, -1}, + {"subject", RSPAMD_URL_FLAG_SUBJECT, -1}, + {"host_encoded", RSPAMD_URL_FLAG_HOSTENCODED, -1}, + {"schema_encoded", RSPAMD_URL_FLAG_SCHEMAENCODED, -1}, + {"path_encoded", RSPAMD_URL_FLAG_PATHENCODED, -1}, + {"query_encoded", RSPAMD_URL_FLAG_QUERYENCODED, -1}, + {"missing_slahes", RSPAMD_URL_FLAG_MISSINGSLASHES, -1}, + {"idn", RSPAMD_URL_FLAG_IDN, -1}, + {"has_port", RSPAMD_URL_FLAG_HAS_PORT, -1}, + {"has_user", RSPAMD_URL_FLAG_HAS_USER, -1}, + {"schemaless", RSPAMD_URL_FLAG_SCHEMALESS, -1}, + {"unnormalised", RSPAMD_URL_FLAG_UNNORMALISED, -1}, + {"zw_spaces", RSPAMD_URL_FLAG_ZW_SPACES, -1}, + {"url_displayed", RSPAMD_URL_FLAG_DISPLAY_URL, -1}, + {"image", RSPAMD_URL_FLAG_IMAGE, -1}, + {"query", RSPAMD_URL_FLAG_QUERY, -1}, + {"content", RSPAMD_URL_FLAG_CONTENT, -1} +}; + static inline khint_t rspamd_url_hash (struct rspamd_url *u); @@ -610,6 +639,26 @@ rspamd_url_init (const gchar *tld_file) url_scanner->matchers_strict->len); } } + + /* Generate hashes for flags */ + for (gint i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) { + url_flag_names[i].hash = + rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT, + url_flag_names[i].name, + strlen (url_flag_names[i].name), 0); + } + /* Ensure that we have no hashes collisions O(N^2) but this array is small */ + for (gint i = 0; i < G_N_ELEMENTS (url_flag_names) - 1; i ++) { + for (gint j = i + 1; j < G_N_ELEMENTS (url_flag_names); j ++) { + if (url_flag_names[i].hash == url_flag_names[j].hash) { + msg_err ("collision: both %s and %s map to %d", + url_flag_names[i].name, url_flag_names[j].name, + url_flag_names[i].hash); + abort (); + } + } + } + } #define SET_U(u, field) do { \ @@ -3991,3 +4040,33 @@ rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url return false; } + +bool +rspamd_url_flag_from_string (const gchar *str, gint *flag) +{ + gint h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT, + str, strlen (str), 0); + + for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) { + if (url_flag_names[i].hash == h) { + *flag |= url_flag_names[i].flag; + + return true; + } + } + + return false; +} + + +const gchar * +rspamd_url_flag_to_string (int flag) +{ + for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) { + if (url_flag_names[i].flag & flag) { + return url_flag_names[i].name; + } + } + + return NULL; +} diff --git a/src/libserver/url.h b/src/libserver/url.h index bb9c57399..2a5892fc5 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -36,6 +36,7 @@ enum rspamd_url_flags { RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u, RSPAMD_URL_FLAG_IMAGE = 1u << 19u, RSPAMD_URL_FLAG_QUERY = 1u << 20u, + RSPAMD_URL_FLAG_CONTENT = 1u << 21u, }; struct rspamd_url_tag { @@ -268,6 +269,21 @@ const gchar *rspamd_url_protocol_name (enum rspamd_url_protocol proto); */ enum rspamd_url_protocol rspamd_url_protocol_from_string (const gchar *str); +/** + * Converts string to a url flag + * @param str + * @param flag + * @return + */ +bool rspamd_url_flag_from_string (const gchar *str, gint *flag); + +/** + * Converts url flag to a string + * @param flag + * @return + */ +const gchar * rspamd_url_flag_to_string (int flag); + /* Defines sets of urls indexed by url as is */ KHASH_DECLARE (rspamd_url_hash, struct rspamd_url *, char); KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char); diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 2be91140a..d53ebbc48 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -198,6 +198,18 @@ rspamd_lua_setclass (lua_State * L, const gchar *classname, gint objidx) } void +rspamd_lua_class_metatable (lua_State *L, const gchar *classname) +{ + khiter_t k; + + k = kh_get (lua_class_set, lua_classes, classname); + + g_assert (k != kh_end (lua_classes)); + lua_rawgetp (L, LUA_REGISTRYINDEX, + RSPAMD_LIGHTUSERDATA_MASK (kh_key (lua_classes, k))); +} + +void rspamd_lua_add_metamethod (lua_State *L, const gchar *classname, luaL_Reg *meth) { @@ -2527,4 +2539,16 @@ rspamd_lua_universal_pcall (lua_State *L, gint cbref, const gchar* strloc, va_end (ap); return true; -}
\ No newline at end of file +} + +#if defined( LUA_VERSION_NUM ) && LUA_VERSION_NUM <= 502 +gint +rspamd_lua_geti (lua_State *L, int pos, int i) +{ + pos = lua_absindex (L, pos); + lua_pushinteger (L, i); + lua_gettable (L, pos); + + return lua_type (L, -1); +} +#endif
\ No newline at end of file diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index 5edec663b..f9ec8e2e3 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -63,11 +63,9 @@ static inline void lua_rawsetp (lua_State *L, int i, const void *p) { #endif /* Interface definitions */ -#define LUA_FUNCTION_DEF(class, name) static int lua_ ## class ## _ ## name ( \ - lua_State * L) -#define LUA_PUBLIC_FUNCTION_DEF(class, name) int lua_ ## class ## _ ## name ( \ - lua_State * L) -#define LUA_INTERFACE_DEF(class, name) { # name, lua_ ## class ## _ ## name } +#define LUA_FUNCTION_DEF(class, name) static int lua_##class##_##name (lua_State * L) +#define LUA_PUBLIC_FUNCTION_DEF(class, name) int lua_##class##_##name (lua_State * L) +#define LUA_INTERFACE_DEF(class, name) { #name, lua_##class##_##name } #ifdef __cplusplus extern "C" { @@ -162,6 +160,13 @@ void rspamd_lua_new_class (lua_State *L, void rspamd_lua_setclass (lua_State *L, const gchar *classname, gint objidx); /** + * Pushes the metatable for specific class on top of the stack + * @param L + * @param classname + */ +void rspamd_lua_class_metatable (lua_State *L, const gchar *classname); + +/** * Adds a new field to the class (metatable) identified by `classname` * @param L * @param classname @@ -600,6 +605,19 @@ gchar *rspamd_lua_get_module_name (lua_State *L); bool rspamd_lua_universal_pcall (lua_State *L, gint cbref, const gchar* strloc, gint nret, const gchar *args, GError **err, ...); +/** + * Wrapper for lua_geti from lua 5.3 + * @param L + * @param index + * @param i + * @return + */ +#if defined( LUA_VERSION_NUM ) && LUA_VERSION_NUM <= 502 +gint rspamd_lua_geti (lua_State *L, int index, int i); +#else +#define rspamd_lua_geti lua_geti +#endif + /* Paths defs */ #define RSPAMD_CONFDIR_INDEX "CONFDIR" #define RSPAMD_LOCAL_CONFDIR_INDEX "LOCAL_CONFDIR" diff --git a/src/lua/lua_ip.c b/src/lua/lua_ip.c index 2604aa100..dc51a83e9 100644 --- a/src/lua/lua_ip.c +++ b/src/lua/lua_ip.c @@ -165,6 +165,13 @@ LUA_FUNCTION_DEF (ip, get_port); */ LUA_FUNCTION_DEF (ip, is_local); +/*** + * @method ip:less_than(other) + * Returns true if address is less than other + * @return {boolean} + */ +LUA_FUNCTION_DEF (ip, less_than); + static const struct luaL_reg iplib_m[] = { LUA_INTERFACE_DEF (ip, to_string), LUA_INTERFACE_DEF (ip, to_table), @@ -183,6 +190,7 @@ static const struct luaL_reg iplib_m[] = { {"__tostring", lua_ip_to_string}, {"__eq", lua_ip_equal}, {"__gc", lua_ip_destroy}, + {"__lt", lua_ip_less_than}, {NULL, NULL} }; @@ -557,6 +565,24 @@ lua_ip_is_local (lua_State *L) return 1; } +static gint +lua_ip_less_than (lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_ip *ip = lua_check_ip (L, 1), + *other = lua_check_ip (L, 2); + + if (ip && other) { + lua_pushboolean (L, + rspamd_inet_address_compare (ip->addr, other->addr, true) < 0); + } + else { + lua_pushnil (L); + } + + return 1; +} + void rspamd_lua_ip_push (lua_State *L, rspamd_inet_addr_t *addr) { diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 4f92bdb06..270d5ec06 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -14,6 +14,8 @@ * limitations under the License. */ #include "lua_common.h" +#include "lua_url.h" + #include "message.h" #include "images.h" #include "archives.h" @@ -176,7 +178,7 @@ LUA_FUNCTION_DEF (task, adjust_result); * - module: string * - score: number * - priority: integer - * - flags: flags sring + * - flags: flags string * - result: named result if needed * * @param {rspamd_action or string} action a numeric or string action value @@ -515,10 +517,19 @@ LUA_FUNCTION_DEF (task, has_from); * @method task:get_from([type]) * Return SMTP or MIME sender for a task. This function returns an internet address which one is a table with the following structure: * + * - `raw` - the original value without any processing * - `name` - name of internet address in UTF8, e.g. for `Vsevolod Stakhov <blah@foo.com>` it returns `Vsevolod Stakhov` * - `addr` - address part of the address * - `user` - user part (if present) of the address, e.g. `blah` * - `domain` - domain part (if present), e.g. `foo.com` + * - `flags` - table with following keys set to true if given condition fulfilled: + * - [valid] - valid SMTP address in conformity with https://tools.ietf.org/html/rfc5321#section-4.1. + * - [ip] - domain is IPv4/IPv6 address + * - [braced] - angled `<blah@foo.com>` address + * - [quoted] - quoted user part + * - [empty] - empty address + * - [backslash] - user part contains backslash + * - [8bit] - contains 8bit characters * @param {integer|string} type if specified has the following meaning: `0` or `any` means try SMTP sender and fallback to MIME if failed, `1` or `smtp` means checking merely SMTP sender and `2` or `mime` means MIME `From:` only * @return {address} sender or `nil` */ @@ -632,7 +643,7 @@ LUA_FUNCTION_DEF (task, get_archives); */ LUA_FUNCTION_DEF (task, get_dkim_results); /*** - * @method task:get_symbol(name) + * @method task:get_symbol(name, [shadow_result_name]) * Searches for a symbol `name` in all metrics results and returns a list of tables * one per metric that describes the symbol inserted. Please note that this function * is intended to return values for **inserted** symbols, so if this symbol was not @@ -644,7 +655,7 @@ LUA_FUNCTION_DEF (task, get_dkim_results); * - `options` - a table of strings representing options of a symbol * - `group` - a group of symbol (or 'ungrouped') * @param {string} name symbol's name - * @return {list of tables} list of tables or nil if symbol was not found in any metric + * @return {list of tables} list of tables or nil if symbol was not found */ LUA_FUNCTION_DEF (task, get_symbol); /*** @@ -654,7 +665,7 @@ LUA_FUNCTION_DEF (task, get_symbol); */ LUA_FUNCTION_DEF (task, get_symbols_all); /*** - * @method task:get_symbols() + * @method task:get_symbols([shadow_result_name]) * Returns array of all symbols matched for this task * @return {table, table} table of strings with symbols names + table of theirs scores */ @@ -694,7 +705,7 @@ LUA_FUNCTION_DEF (task, get_symbols_tokens); LUA_FUNCTION_DEF (task, process_ann_tokens); /*** - * @method task:has_symbol(name) + * @method task:has_symbol(name, [shadow_result_name]) * Fast path to check if a specified symbol is in the task's results * @param {string} name symbol's name * @return {boolean} `true` if symbol has been found @@ -2003,9 +2014,11 @@ lua_task_adjust_result (lua_State * L) } if (s) { - metric_res->score -= s->score; - s->score = weight; - metric_res->score += s->score; + if (!isnan (weight)) { + metric_res->score -= s->score; + s->score = weight; + metric_res->score += s->score; + } } else { return luaL_error (L, "symbol not found: %s", symbol_name); @@ -2235,61 +2248,7 @@ lua_task_append_message (lua_State * L) return 0; } -struct lua_tree_cb_data { - lua_State *L; - int i; - gint mask; - gint need_images; - gdouble skip_prob; - guint64 xoroshiro_state[4]; -}; - -static void -lua_tree_url_callback (gpointer key, gpointer value, gpointer ud) -{ - struct rspamd_lua_url *lua_url; - struct rspamd_url *url = (struct rspamd_url *)value; - struct lua_tree_cb_data *cb = ud; - - if (url->protocol & cb->mask) { - if (!cb->need_images && (url->flags & RSPAMD_URL_FLAG_IMAGE)) { - return; - } - - if (cb->skip_prob > 0) { - gdouble coin = rspamd_random_double_fast_seed (cb->xoroshiro_state); - - if (coin < cb->skip_prob) { - return; - } - } - - lua_url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url)); - rspamd_lua_setclass (cb->L, "rspamd{url}", -1); - lua_url->url = url; - lua_rawseti (cb->L, -2, cb->i++); - } -} - -static inline gsize -lua_task_urls_adjust_skip_prob (struct rspamd_task *task, - struct lua_tree_cb_data *cb, gsize sz, gsize max_urls) -{ - if (max_urls > 0 && sz > max_urls) { - cb->skip_prob = 1.0 - ((gdouble)max_urls) / (gdouble)sz; - /* - * Use task dependent probabilistic seed to ensure that - * consequent task:get_urls return the same list of urls - */ - memcpy (&cb->xoroshiro_state[0], &task->task_timestamp, - MIN (sizeof (cb->xoroshiro_state[0]), sizeof (task->task_timestamp))); - memcpy (&cb->xoroshiro_state[1], MESSAGE_FIELD (task, digest), - sizeof (cb->xoroshiro_state[1]) * 3); - sz = max_urls; - } - return sz; -} static gint lua_task_get_urls (lua_State * L) @@ -2297,12 +2256,9 @@ lua_task_get_urls (lua_State * L) LUA_TRACE_POINT; struct rspamd_task *task = lua_check_task (L, 1); struct lua_tree_cb_data cb; - gint protocols_mask = 0; - static const gint default_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS| - PROTOCOL_FILE|PROTOCOL_FTP; - const gchar *cache_name = "emails+urls"; struct rspamd_url *u; - gboolean need_images = FALSE; + static const gint default_protocols_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS| + PROTOCOL_FILE|PROTOCOL_FTP; gsize sz, max_urls = 0; if (task) { @@ -2316,135 +2272,26 @@ lua_task_get_urls (lua_State * L) return 1; } - if (lua_gettop (L) >= 2) { - if (lua_type (L, 2) == LUA_TBOOLEAN) { - protocols_mask = default_mask; - if (lua_toboolean (L, 2)) { - protocols_mask |= PROTOCOL_MAILTO; - } - } - else if (lua_type (L, 2) == LUA_TTABLE) { - for (lua_pushnil (L); lua_next (L, 2); lua_pop (L, 1)) { - int nmask; - const gchar *pname = lua_tostring (L, -1); - - nmask = rspamd_url_protocol_from_string (pname); - - if (nmask != PROTOCOL_UNKNOWN) { - protocols_mask |= nmask; - } - else { - msg_info ("bad url protocol: %s", pname); - } - } - } - else if (lua_type (L, 2) == LUA_TSTRING) { - const gchar *plist = lua_tostring (L, 2); - gchar **strvec; - gchar * const *cvec; - - strvec = g_strsplit_set (plist, ",;", -1); - cvec = strvec; - - while (*cvec) { - int nmask; - - nmask = rspamd_url_protocol_from_string (*cvec); - - if (nmask != PROTOCOL_UNKNOWN) { - protocols_mask |= nmask; - } - else { - msg_info ("bad url protocol: %s", *cvec); - } - - cvec ++; - } - - g_strfreev (strvec); - } - else { - protocols_mask = default_mask; - } - - if (lua_type (L, 3) == LUA_TBOOLEAN) { - need_images = lua_toboolean (L, 3); - } - } - else { - protocols_mask = default_mask; + /* Exclude RSPAMD_URL_FLAG_CONTENT to preserve backward compatibility */ + if (!lua_url_cbdata_fill (L, 2, &cb, default_protocols_mask, + (~RSPAMD_URL_FLAG_CONTENT), max_urls)) { + return luaL_error (L, "invalid arguments"); } - memset (&cb, 0, sizeof (cb)); - cb.i = 1; - cb.L = L; - cb.mask = protocols_mask; - cb.need_images = need_images; + sz = kh_size (MESSAGE_FIELD (task, urls)); + sz = lua_url_adjust_skip_prob (task->task_timestamp, + MESSAGE_FIELD (task, digest), &cb, sz); - if (protocols_mask & PROTOCOL_MAILTO) { - if (need_images) { - cache_name = "emails+urls+img"; - } - else { - cache_name = "emails+urls"; - } - - sz = kh_size (MESSAGE_FIELD (task, urls)); - - sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls); + lua_createtable (L, sz, 0); - if (protocols_mask == (default_mask|PROTOCOL_MAILTO)) { - /* Can use cached version */ - if (!lua_task_get_cached (L, task, cache_name)) { - lua_createtable (L, sz, 0); - kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - lua_tree_url_callback (u, u, &cb); - }); - lua_task_set_cached (L, task, cache_name, -1); - } - } - else { - lua_createtable (L, sz, 0); - kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - lua_tree_url_callback (u, u, &cb); - }); - } - - } - else { - if (need_images) { - cache_name = "urls+img"; - } - else { - cache_name = "urls"; - } + kh_foreach_key (MESSAGE_FIELD (task, urls), u, { + lua_tree_url_callback (u, u, &cb); + }); - sz = kh_size (MESSAGE_FIELD (task, urls)); - sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls); - - if (protocols_mask == (default_mask)) { - if (!lua_task_get_cached (L, task, cache_name)) { - lua_createtable (L, sz, 0); - kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - if (!(u->protocol & PROTOCOL_MAILTO)) { - lua_tree_url_callback (u, u, &cb); - } - }); - lua_task_set_cached (L, task, cache_name, -1); - } - } - else { - lua_createtable (L, sz, 0); - kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - if (!(u->protocol & PROTOCOL_MAILTO)) { - lua_tree_url_callback (u, u, &cb); - } - }); - } - } + lua_url_cbdata_dtor (&cb); } else { - return luaL_error (L, "invalid arguments"); + return luaL_error (L, "invalid arguments, no task"); } return 1; @@ -2581,20 +2428,26 @@ lua_task_get_emails (lua_State * L) struct rspamd_task *task = lua_check_task (L, 1); struct lua_tree_cb_data cb; struct rspamd_url *u; + gsize max_urls = 0, sz; if (task) { if (task->message) { - lua_createtable (L, kh_size (MESSAGE_FIELD (task, urls)), 0); - memset (&cb, 0, sizeof (cb)); - cb.i = 1; - cb.L = L; - cb.mask = PROTOCOL_MAILTO; + if (!lua_url_cbdata_fill (L, 2, &cb, PROTOCOL_MAILTO, + (~RSPAMD_URL_FLAG_CONTENT), max_urls)) { + return luaL_error (L, "invalid arguments"); + } + + sz = kh_size (MESSAGE_FIELD (task, urls)); + sz = lua_url_adjust_skip_prob (task->task_timestamp, + MESSAGE_FIELD (task, digest), &cb, sz); + + lua_createtable (L, sz, 0); kh_foreach_key (MESSAGE_FIELD (task, urls), u, { - if ((u->protocol & PROTOCOL_MAILTO)) { - lua_tree_url_callback (u, u, &cb); - } + lua_tree_url_callback (u, u, &cb); }); + + lua_url_cbdata_dtor (&cb); } else { lua_newtable (L); @@ -3300,8 +3153,18 @@ static void lua_push_email_address (lua_State *L, struct rspamd_email_address *addr) { if (addr) { - lua_createtable (L, 0, 4); + lua_createtable (L, 0, 5); + if (addr->raw_len > 0) { + lua_pushstring (L, "raw"); + lua_pushlstring (L, addr->raw, addr->raw_len); + lua_settable (L, -3); + } + else { + lua_pushstring (L, "raw"); + lua_pushstring (L, ""); + lua_settable (L, -3); + } if (addr->addr_len > 0) { lua_pushstring (L, "addr"); lua_pushlstring (L, addr->addr, addr->addr_len); @@ -4398,25 +4261,26 @@ lua_task_get_dkim_results (lua_State *L) static inline gboolean lua_push_symbol_result (lua_State *L, - struct rspamd_task *task, - const gchar *symbol, - struct rspamd_symbol_result *symbol_result, - gboolean add_metric, - gboolean add_name) + struct rspamd_task *task, + const gchar *symbol, + struct rspamd_symbol_result *symbol_result, + struct rspamd_scan_result *metric_res, + gboolean add_metric, + gboolean add_name) { - struct rspamd_scan_result *metric_res; + struct rspamd_symbol_result *s = NULL; struct rspamd_symbol_option *opt; struct rspamd_symbols_group *sym_group; guint i; - gint j = 1, e = 4; + gint j = 1, table_fields_cnt = 4; - if (!symbol_result) { + if (!metric_res) { metric_res = task->result; + } - if (metric_res) { - s = rspamd_task_find_symbol_result (task, symbol, NULL); - } + if (!symbol_result) { + s = rspamd_task_find_symbol_result (task, symbol, metric_res); } else { s = symbol_result; @@ -4424,13 +4288,13 @@ lua_push_symbol_result (lua_State *L, if (s) { if (add_metric) { - e++; + table_fields_cnt++; } if (add_name) { - e++; + table_fields_cnt++; } - lua_createtable (L, 0, e); + lua_createtable (L, 0, table_fields_cnt); if (add_name) { lua_pushstring (L, "name"); @@ -4487,16 +4351,27 @@ lua_task_get_symbol (lua_State *L) struct rspamd_task *task = lua_check_task (L, 1); const gchar *symbol; gboolean found = FALSE; - gint i = 1; symbol = luaL_checkstring (L, 2); if (task && symbol) { + struct rspamd_scan_result *sres = NULL; + + if (lua_isstring (L, 3)) { + sres = rspamd_find_metric_result (task, lua_tostring (L, 3)); + + if (sres == NULL) { + return luaL_error (L, "invalid scan result: %s", + lua_tostring (L, 3)); + } + } + + /* Always push as a table for compatibility :( */ lua_createtable (L, 1, 0); if ((found = lua_push_symbol_result (L, task, symbol, - NULL, TRUE, FALSE))) { - lua_rawseti (L, -2, i++); + NULL, sres, TRUE, FALSE))) { + lua_rawseti (L, -2, 1); } else { /* Pop table */ @@ -4525,7 +4400,13 @@ lua_task_has_symbol (lua_State *L) symbol = luaL_checkstring (L, 2); if (task && symbol) { - found = (rspamd_task_find_symbol_result (task, symbol, NULL) != NULL); + if (lua_isstring (L, 3)) { + found = (rspamd_task_find_symbol_result (task, symbol, + rspamd_find_metric_result (task, lua_tostring (L, 3))) != NULL); + } + else { + found = (rspamd_task_find_symbol_result (task, symbol, NULL) != NULL); + } lua_pushboolean (L, found); } else { @@ -4642,7 +4523,7 @@ lua_task_get_symbols_all (lua_State *L) kh_foreach_value_ptr (mres->symbols, s, { if (!(s->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) { - lua_push_symbol_result (L, task, s->name, s, FALSE, TRUE); + lua_push_symbol_result (L, task, s->name, s, mres, FALSE, TRUE); lua_rawseti (L, -2, i++); } }); diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index efd34dc6c..ed1eb1901 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -14,7 +14,8 @@ * limitations under the License. */ #include "lua_common.h" -#include "contrib/uthash/utlist.h" +#include "lua_url.h" + /*** * @module rspamd_url @@ -714,7 +715,7 @@ lua_url_to_table (lua_State *L) /*** - * @function url.create([mempool,] str) + * @function url.create([mempool,] str, [{flags_table}]) * @param {rspamd_mempool} memory pool for URL, e.g. `task:get_mempool()` * @param {string} text that contains URL (can also contain other stuff) * @return {url} new url object that exists as long as the corresponding mempool exists @@ -727,6 +728,7 @@ lua_url_create (lua_State *L) const gchar *text; size_t length; gboolean own_pool = FALSE; + struct rspamd_lua_url *u; if (lua_type (L, 1) == LUA_TUSERDATA) { pool = rspamd_lua_check_mempool (L, 1); @@ -752,6 +754,26 @@ lua_url_create (lua_State *L) if (lua_type (L, -1) != LUA_TUSERDATA) { /* URL is actually not found */ lua_pushnil (L); + + return 1; + } + + u = (struct rspamd_lua_url *)lua_touserdata (L, -1); + + if (lua_type (L, 3) == LUA_TTABLE) { + /* Add flags */ + for (lua_pushnil (L); lua_next (L, 3); lua_pop (L, 1)) { + int nmask = 0; + const gchar *fname = lua_tostring (L, -1); + + if (rspamd_url_flag_from_string (fname, &nmask)) { + u->url->flags |= nmask; + } + else { + lua_pop (L, 1); + return luaL_error (L, "invalid flag: %s", fname); + } + } } } @@ -853,9 +875,9 @@ lua_url_all (lua_State *L) * - `image`: URL is from src attribute of img HTML tag * @return {table} URL flags */ -#define PUSH_FLAG(fl, name) do { \ +#define PUSH_FLAG(fl) do { \ if (flags & (fl)) { \ - lua_pushstring (L, (name)); \ + lua_pushstring (L, rspamd_url_flag_to_string (fl)); \ lua_pushboolean (L, true); \ lua_settable (L, -3); \ } \ @@ -873,26 +895,27 @@ lua_url_get_flags (lua_State *L) lua_createtable (L, 0, 4); - PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED, "phished"); - PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC, "numeric"); - PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED, "obscured"); - PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED, "redirected"); - PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED, "html_displayed"); - PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT, "text"); - PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT, "subject"); - PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED, "host_encoded"); - PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED, "schema_encoded"); - PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED, "path_encoded"); - PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED, "query_encoded"); - PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES, "missing_slahes"); - PUSH_FLAG (RSPAMD_URL_FLAG_IDN, "idn"); - PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT, "has_port"); - PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user"); - PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless"); - PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised"); - PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces"); - PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed"); - PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image"); + PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED); + PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC); + PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED); + PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED); + PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED); + PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT); + PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT); + PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED); + PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED); + PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED); + PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED); + PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES); + PUSH_FLAG (RSPAMD_URL_FLAG_IDN); + PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT); + PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER); + PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS); + PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED); + PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES); + PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL); + PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE); + PUSH_FLAG (RSPAMD_URL_FLAG_CONTENT); } else { return luaL_error (L, "invalid arguments"); @@ -903,6 +926,243 @@ lua_url_get_flags (lua_State *L) #undef PUSH_FLAG +void +lua_tree_url_callback (gpointer key, gpointer value, gpointer ud) +{ + struct rspamd_lua_url *lua_url; + struct rspamd_url *url = (struct rspamd_url *)value; + struct lua_tree_cb_data *cb = ud; + + if ((url->protocol & cb->protocols_mask) && (url->flags == 0 || + (url->flags & cb->flags_mask))) { + + if (cb->skip_prob > 0) { + gdouble coin = rspamd_random_double_fast_seed (cb->xoroshiro_state); + + if (coin < cb->skip_prob) { + return; + } + } + + lua_url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url)); + lua_pushvalue (cb->L, cb->metatable_pos); + lua_setmetatable (cb->L, -2); + lua_url->url = url; + lua_rawseti (cb->L, -2, cb->i++); + } +} + +gboolean +lua_url_cbdata_fill (lua_State *L, + gint pos, + struct lua_tree_cb_data *cbd, + guint default_protocols, + guint default_flags, + gsize max_urls) +{ + gint protocols_mask = 0; + + gint pos_arg_type = lua_type (L, pos); + guint flags_mask = default_flags; + + if (pos_arg_type == LUA_TBOOLEAN) { + protocols_mask = default_protocols; + if (lua_toboolean (L, 2)) { + protocols_mask |= PROTOCOL_MAILTO; + } + } + else if (pos_arg_type == LUA_TTABLE) { + if (rspamd_lua_geti (L, 1, pos) == LUA_TNIL) { + /* New method: indexed table */ + + lua_getfield (L, pos, "flags"); + if (lua_istable (L, -1)) { + for (lua_pushnil (L); lua_next (L, pos); lua_pop (L, 1)) { + int nmask = 0; + const gchar *fname = lua_tostring (L, -1); + + + if (rspamd_url_flag_from_string (fname, &nmask)) { + flags_mask |= nmask; + } + else { + msg_info ("bad url flag: %s", fname); + return FALSE; + } + } + } + else { + flags_mask |= default_flags; + } + lua_pop (L, 1); + + lua_getfield (L, pos, "protocols"); + if (lua_istable (L, -1)) { + for (lua_pushnil (L); lua_next (L, pos); lua_pop (L, 1)) { + int nmask; + const gchar *pname = lua_tostring (L, -1); + + nmask = rspamd_url_protocol_from_string (pname); + + if (nmask != PROTOCOL_UNKNOWN) { + protocols_mask |= nmask; + } + else { + msg_info ("bad url protocol: %s", pname); + return FALSE; + } + } + } + else { + protocols_mask = default_protocols; + } + lua_pop (L, 1); + + lua_getfield (L, pos, "emails"); + if (lua_isboolean (L, -1)) { + if (lua_toboolean (L, -1)) { + protocols_mask |= PROTOCOL_MAILTO; + } + } + lua_pop (L, 1); + + lua_getfield (L, pos, "images"); + if (lua_isboolean (L, -1)) { + if (lua_toboolean (L, -1)) { + flags_mask |= RSPAMD_URL_FLAG_IMAGE; + } + else { + flags_mask &= ~RSPAMD_URL_FLAG_IMAGE; + } + } + lua_pop (L, 1); + + lua_getfield (L, pos, "content"); + if (lua_isboolean (L, -1)) { + if (lua_toboolean (L, -1)) { + flags_mask |= RSPAMD_URL_FLAG_CONTENT; + } + else { + flags_mask &= ~RSPAMD_URL_FLAG_CONTENT; + } + } + lua_pop (L, 1); + + lua_getfield (L, pos, "max_urls"); + if (lua_isnumber (L, -1)) { + max_urls = lua_tonumber (L, -1); + } + lua_pop (L, 1); + } + else { + /* Plain table of the protocols */ + for (lua_pushnil (L); lua_next (L, pos); lua_pop (L, 1)) { + int nmask; + const gchar *pname = lua_tostring (L, -1); + + nmask = rspamd_url_protocol_from_string (pname); + + if (nmask != PROTOCOL_UNKNOWN) { + protocols_mask |= nmask; + } + else { + msg_info ("bad url protocol: %s", pname); + return FALSE; + } + } + } + + lua_pop (L, 1); /* After rspamd_lua_geti */ + } + else if (pos_arg_type == LUA_TSTRING) { + const gchar *plist = lua_tostring (L, pos); + gchar **strvec; + gchar * const *cvec; + + strvec = g_strsplit_set (plist, ",;", -1); + cvec = strvec; + + while (*cvec) { + int nmask; + + nmask = rspamd_url_protocol_from_string (*cvec); + + if (nmask != PROTOCOL_UNKNOWN) { + protocols_mask |= nmask; + } + else { + msg_info ("bad url protocol: %s", *cvec); + return FALSE; + } + + cvec ++; + } + + g_strfreev (strvec); + } + else if (pos_arg_type == LUA_TNONE || pos_arg_type == LUA_TNIL) { + protocols_mask = default_protocols; + flags_mask = default_flags; + } + else { + return FALSE; + } + + if (lua_type (L, pos + 1) == LUA_TBOOLEAN) { + if (lua_toboolean (L, pos + 1)) { + flags_mask |= RSPAMD_URL_FLAG_IMAGE; + } + else { + flags_mask &= ~RSPAMD_URL_FLAG_IMAGE; + } + } + + memset (cbd, 0, sizeof (*cbd)); + + cbd->i = 1; + cbd->L = L; + cbd->max_urls = max_urls; + cbd->protocols_mask = protocols_mask; + cbd->flags_mask = flags_mask; + + /* This needs to be removed from the stack */ + rspamd_lua_class_metatable (L, "rspamd{url}"); + cbd->metatable_pos = lua_gettop (L); + (void)lua_checkstack (L, cbd->metatable_pos + 4); + + return TRUE; +} + +void +lua_url_cbdata_dtor (struct lua_tree_cb_data *cbd) +{ + if (cbd->metatable_pos != -1) { + lua_remove (cbd->L, cbd->metatable_pos); + } +} + +gsize +lua_url_adjust_skip_prob (gdouble timestamp, + guchar *digest, + struct lua_tree_cb_data *cb, + gsize sz) +{ + if (cb->max_urls > 0 && sz > cb->max_urls) { + cb->skip_prob = 1.0 - ((gdouble)cb->max_urls) / (gdouble)sz; + /* + * Use task dependent probabilistic seed to ensure that + * consequent task:get_urls return the same list of urls + */ + memcpy (&cb->xoroshiro_state[0], ×tamp, + MIN (sizeof (cb->xoroshiro_state[0]), sizeof (timestamp))); + memcpy (&cb->xoroshiro_state[1], digest, + sizeof (cb->xoroshiro_state[1]) * 3); + sz = cb->max_urls; + } + + return sz; +} + static gint lua_load_url (lua_State * L) { diff --git a/src/lua/lua_url.h b/src/lua/lua_url.h new file mode 100644 index 000000000..0ea2186d8 --- /dev/null +++ b/src/lua/lua_url.h @@ -0,0 +1,75 @@ +/*- + * Copyright 2020 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RSPAMD_LUA_URL_H +#define RSPAMD_LUA_URL_H + +#include "lua_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct lua_tree_cb_data { + lua_State *L; + int i; + int metatable_pos; + guint flags_mask; + guint protocols_mask; + gsize max_urls; + gdouble skip_prob; + guint64 xoroshiro_state[4]; +}; + +void lua_tree_url_callback (gpointer key, gpointer value, gpointer ud); + +/** + * Fills a cbdata table based on the parameter at position pos + * @param L + * @param pos + * @param cbd + * @return + */ +gboolean lua_url_cbdata_fill (lua_State *L, gint pos, + struct lua_tree_cb_data *cbd, + guint default_protocols, + guint default_flags, + gsize max_urls); + +/** + * Cleanup url cbdata + * @param cbd + */ +void lua_url_cbdata_dtor (struct lua_tree_cb_data *cbd); + +/** + * Adjust probabilistic skip of the urls + * @param timestamp + * @param digest + * @param cb + * @param sz + * @param max_urls + * @return + */ +gsize lua_url_adjust_skip_prob (gdouble timestamp, + guchar *digest, + struct lua_tree_cb_data *cb, + gsize sz); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 04413f2f6..5948f4b4b 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -153,18 +153,6 @@ LUA_FUNCTION_DEF (util, parse_html); LUA_FUNCTION_DEF (util, levenshtein_distance); /*** - * @function util.parse_addr(str) - * Parse rfc822 address to components. Returns a table of components: - * - * - `name`: name of address (e.g. Some User) - * - `addr`: address part (e.g. user@example.com) - * - * @param {string} str input string - * @return {table} resulting table of components - */ -LUA_FUNCTION_DEF (util, parse_addr); - -/*** * @function util.fold_header(name, value, [how, [stop_chars]]) * Fold rfc822 header according to the folding rules * @@ -213,13 +201,22 @@ LUA_FUNCTION_DEF (util, get_tld); LUA_FUNCTION_DEF (util, glob); /*** - * @function util.parse_mail_address(str, pool) + * @function util.parse_mail_address(str, [pool]) * Parses email address and returns a table of tables in the following format: * + * - `raw` - the original value without any processing * - `name` - name of internet address in UTF8, e.g. for `Vsevolod Stakhov <blah@foo.com>` it returns `Vsevolod Stakhov` * - `addr` - address part of the address * - `user` - user part (if present) of the address, e.g. `blah` * - `domain` - domain part (if present), e.g. `foo.com` + * - `flags` - table with following keys set to true if given condition fulfilled: + * - [valid] - valid SMTP address in conformity with https://tools.ietf.org/html/rfc5321#section-4.1. + * - [ip] - domain is IPv4/IPv6 address + * - [braced] - angled `<blah@foo.com>` address + * - [quoted] - quoted user part + * - [empty] - empty address + * - [backslash] - user part contains backslash + * - [8bit] - contains 8bit characters * * @param {string} str input string * @param {rspamd_mempool} pool memory pool to use @@ -650,12 +647,12 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, tanh), LUA_INTERFACE_DEF (util, parse_html), LUA_INTERFACE_DEF (util, levenshtein_distance), - LUA_INTERFACE_DEF (util, parse_addr), LUA_INTERFACE_DEF (util, fold_header), LUA_INTERFACE_DEF (util, is_uppercase), LUA_INTERFACE_DEF (util, humanize_number), LUA_INTERFACE_DEF (util, get_tld), LUA_INTERFACE_DEF (util, glob), + {"parse_addr", lua_util_parse_mail_address}, LUA_INTERFACE_DEF (util, parse_mail_address), LUA_INTERFACE_DEF (util, strlen_utf8), LUA_INTERFACE_DEF (util, lower_utf8), @@ -1469,51 +1466,6 @@ lua_util_levenshtein_distance (lua_State *L) } static gint -lua_util_parse_addr (lua_State *L) -{ - LUA_TRACE_POINT; - GPtrArray *addrs; - gsize len; - const gchar *str = luaL_checklstring (L, 1, &len); - rspamd_mempool_t *pool; - gboolean own_pool = FALSE; - - if (str) { - - if (lua_type (L, 2) == LUA_TUSERDATA) { - pool = rspamd_lua_check_mempool (L, 2); - - if (pool == NULL) { - return luaL_error (L, "invalid arguments"); - } - } - else { - pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), - "lua util", 0); - own_pool = TRUE; - } - - addrs = rspamd_email_address_from_mime (pool, str, len, NULL); - - if (addrs == NULL) { - lua_pushnil (L); - } - else { - lua_push_emails_address_list (L, addrs, 0); - } - - if (own_pool) { - rspamd_mempool_delete (pool); - } - } - else { - lua_pushnil (L); - } - - return 1; -} - -static gint lua_util_fold_header (lua_State *L) { LUA_TRACE_POINT; diff --git a/src/plugins/lua/force_actions.lua b/src/plugins/lua/force_actions.lua index 108c0b76e..9654a836e 100644 --- a/src/plugins/lua/force_actions.lua +++ b/src/plugins/lua/force_actions.lua @@ -23,12 +23,14 @@ end local E = {} local N = 'force_actions' +local selector_cache = {} local fun = require "fun" local lua_util = require "lua_util" local rspamd_cryptobox_hash = require "rspamd_cryptobox_hash" local rspamd_expression = require "rspamd_expression" local rspamd_logger = require "rspamd_logger" +local lua_selectors = require "lua_selectors" local function gen_cb(expr, act, pool, message, subject, raction, honor, limit, least) @@ -63,6 +65,29 @@ local function gen_cb(expr, act, pool, message, subject, raction, honor, limit, return function(task) + local function process_message_selectors(repl, selector_expr) + -- create/reuse selector to extract value for this placeholder + local selector = selector_cache[selector_expr] + if not selector then + selector_cache[selector_expr] = lua_selectors.create_selector_closure(rspamd_config, selector_expr, '', true) + selector = selector_cache[selector_expr] + if not selector then + rspamd_logger.errx(task, 'could not create selector [%1]', selector_expr) + return "((could not create selector))" + end + end + local extracted = selector(task) + if extracted then + if type(extracted) == 'table' then + extracted = table.concat(extracted, ',') + end + else + rspamd_logger.errx(task, 'could not extract value with selector [%1]', selector_expr) + extracted = '((error extracting value))' + end + return extracted + end + local cact = task:get_metric_action('default') if cact == act then return false @@ -83,8 +108,9 @@ local function gen_cb(expr, act, pool, message, subject, raction, honor, limit, if least then flags = "least" end if type(message) == 'string' then + -- process selector expressions in the message + message = string.gsub(message, '(${(.-)})', process_message_selectors) task:set_pre_result(act, message, N, nil, nil, flags) - else task:set_pre_result(act, nil, N, nil, nil, flags) end diff --git a/src/plugins/lua/rbl.lua b/src/plugins/lua/rbl.lua index 026432404..430afaf34 100644 --- a/src/plugins/lua/rbl.lua +++ b/src/plugins/lua/rbl.lua @@ -282,7 +282,12 @@ end local function gen_rbl_callback(rule) local function is_whitelisted(task, req, req_str, whitelist, what) - if rule.ignore_whitelist then return false end + if rule.ignore_whitelist then + lua_util.debugm(N, task, + 'ignore whitelisting checks to %s by %s: ignore whitelist is being set', + req_str, rule.symbol) + return false + end if rule.whitelist then if rule.whitelist:get_key(req) then @@ -296,12 +301,18 @@ local function gen_rbl_callback(rule) -- Maybe whitelisted by some other rbl rule if whitelist then - local wl_what = whitelist[req_str] - if wl_what then + local wl = whitelist[req_str] + if wl then lua_util.debugm(N, task, - 'whitelisted %s on %s by %s rbl rule (%s checked)', - req_str, wl_what, what) - return wl_what == what + 'whitelisted request to %s by %s (%s) rbl rule (%s checked type, %s whitelist type)', + req_str, wl.type, wl.symbol, what, wl.type) + if wl.type == what then + -- This was decided to be a bad idea as in case of whitelisting a request to blacklist + -- is not even sent + --task:adjust_result(wl.symbol, 0.0 / 0.0, rule.symbol) + + return true + end end end @@ -944,8 +955,12 @@ local function add_rbl(key, rbl, global_opts) name = prefix .. '_' .. rbl.symbol, } end - if not rbl.is_whitelist and rbl.ignore_whitelist == false then + if not (rbl.is_whitelist or rbl.ignore_whitelist) then table.insert(black_symbols, rbl.symbol .. '_CHECK') + else + lua_util.debugm(N, rspamd_config, 'rule %s ignores whitelists: rbl.is_whitelist = %s, ' .. + 'rbl.ignore_whitelist = %s', + rbl.symbol, rbl.is_whitelist, rbl.ignore_whitelist) end else id = rspamd_config:register_symbol{ @@ -954,8 +969,12 @@ local function add_rbl(key, rbl, global_opts) name = rbl.symbol, flags = table.concat(flags_tbl, ',') } - if not rbl.is_whitelist and rbl.ignore_whitelist == false then + if not (rbl.is_whitelist or rbl.ignore_whitelist) then table.insert(black_symbols, rbl.symbol) + else + lua_util.debugm(N, rspamd_config, 'rule %s ignores whitelists: rbl.is_whitelist = %s, ' .. + 'rbl.ignore_whitelist = %s', + rbl.symbol, rbl.is_whitelist, rbl.ignore_whitelist) end end @@ -1004,7 +1023,7 @@ local function add_rbl(key, rbl, global_opts) table.insert(white_symbols, s) end else - if rbl.ignore_whitelist == false then + if not rbl.ignore_whitelist then table.insert(black_symbols, s) end end @@ -1191,7 +1210,10 @@ local function rbl_callback_white(task) lua_util.debugm(N, task,'found whitelist from %s: %s(%s)', w, elt, what) if elt and what then - whitelisted_elements[elt] = what + whitelisted_elements[elt] = { + type = what, + symbol = w, + } end end end diff --git a/src/plugins/lua/replies.lua b/src/plugins/lua/replies.lua index 6c3459b4f..9161ee043 100644 --- a/src/plugins/lua/replies.lua +++ b/src/plugins/lua/replies.lua @@ -23,6 +23,7 @@ local rspamd_logger = require 'rspamd_logger' local hash = require 'rspamd_cryptobox_hash' local lua_util = require 'lua_util' local lua_redis = require 'lua_redis' +local fun = require "fun" -- A plugin that implements replies check using redis @@ -65,16 +66,21 @@ end local function replies_check(task) local function check_recipient(stored_rcpt) - local real_rcpt = task:get_principal_recipient() + local rcpts = task:get_recipients('mime') - if real_rcpt then - local real_rcpt_h = make_key(real_rcpt:lower(), 8) - if real_rcpt_h == stored_rcpt then + if rcpts then + local predicate = function(input_rcpt) + local real_rcpt_h = make_key(input_rcpt:lower(), 8) + + return real_rcpt_h == stored_rcpt + end + + if fun.any(predicate, rcpts) then return true end - rspamd_logger.infox(task, 'ignoring reply as recipient %s is not matching hash %s', - real_rcpt, stored_rcpt) + rspamd_logger.infox(task, 'ignoring reply as no recipients are matching hash %s', + stored_rcpt) else rspamd_logger.infox(task, 'ignoring reply as recipient cannot be detected for hash %s', stored_rcpt) @@ -158,8 +164,8 @@ local function replies_set(task) key, -- hash key true, -- is write redis_set_cb, --callback - 'SETEX', -- command - {key, tostring(math.floor(settings['expire'])), value:lower()} -- arguments + 'PSETEX', -- command + {key, tostring(math.floor(settings['expire'] * 1000)), value:lower()} -- arguments ) if not ret then rspamd_logger.errx(task, "redis request wasn't scheduled") diff --git a/src/plugins/lua/spamassassin.lua b/src/plugins/lua/spamassassin.lua index 2ba0b6228..798578856 100644 --- a/src/plugins/lua/spamassassin.lua +++ b/src/plugins/lua/spamassassin.lua @@ -232,7 +232,7 @@ local function handle_header_def(hline, cur_rule) fun.each(function(func) if func == 'addr' then cur_param['function'] = function(str) - local addr_parsed = util.parse_addr(str) + local addr_parsed = util.parse_mail_address(str) local ret = {} if addr_parsed then for _,elt in ipairs(addr_parsed) do @@ -246,7 +246,7 @@ local function handle_header_def(hline, cur_rule) end elseif func == 'name' then cur_param['function'] = function(str) - local addr_parsed = util.parse_addr(str) + local addr_parsed = util.parse_mail_address(str) local ret = {} if addr_parsed then for _,elt in ipairs(addr_parsed) do diff --git a/src/plugins/lua/url_redirector.lua b/src/plugins/lua/url_redirector.lua index ba7d77649..8572bb9af 100644 --- a/src/plugins/lua/url_redirector.lua +++ b/src/plugins/lua/url_redirector.lua @@ -146,13 +146,13 @@ local function cache_url(task, orig_url, url, key, param) true, -- is write redis_set_cb, --callback 'SETEX', -- command - {key, tostring(settings.expire), url} -- arguments + {key, tostring(settings.expire), str_url} -- arguments ) if not ret then rspamd_logger.errx(task, 'cannot make redis request to cache results') else - conn:add_cmd('ZINCRBY', {settings.top_urls_key, '1', url}) + conn:add_cmd('ZINCRBY', {settings.top_urls_key, '1', str_url}) end end diff --git a/test/functional/cases/300_rbl.robot b/test/functional/cases/300_rbl.robot index ef858558c..9a386619f 100644 --- a/test/functional/cases/300_rbl.robot +++ b/test/functional/cases/300_rbl.robot @@ -35,6 +35,7 @@ RBL RECEIVED HIT RBL FROM HIT WL ${result} = Scan Message With Rspamc ${MESSAGE} -i 4.3.2.4 Check Rspamc ${result} FAKE_RBL_CODE_2 inverse=True + Should Contain ${result.stdout} FAKE_WL_RBL_CODE_2 (1.00)[4.3.2.4:from] EMAILBL Compose Map 1 ${result} = Scan Message With Rspamc ${TESTDIR}/messages/url14.eml @@ -47,7 +48,7 @@ EMAILBL Compose Map 2 EMAILBL Compose Map 3 ${result} = Scan Message With Rspamc ${TESTDIR}/messages/url16.eml Should Contain ${result.stdout} RSPAMD_EMAILBL (0.00)[41.black.sanchez.com:email] - + *** Keywords *** Rbl Setup diff --git a/test/functional/configs/rbl.conf b/test/functional/configs/rbl.conf index da5c59b53..2318f5b81 100644 --- a/test/functional/configs/rbl.conf +++ b/test/functional/configs/rbl.conf @@ -34,6 +34,7 @@ rbl { RSPAMD_EMAILBL { rbl = "test8.uribl"; url_compose_map = "${TESTDIR}/configs/maps/url_compose_map_for_mails.list"; + ignore_defaults = true; emails = true; emails_domainonly = true returncodes = { diff --git a/utils/sa_trivial_convert.lua b/utils/sa_trivial_convert.lua index 4725dabd5..56a01adfa 100644 --- a/utils/sa_trivial_convert.lua +++ b/utils/sa_trivial_convert.lua @@ -52,7 +52,7 @@ local function handle_header_def(hline, cur_rule) fun.each(function(func) if func == 'addr' then cur_param['function'] = function(str) - local addr_parsed = util.parse_addr(str) + local addr_parsed = util.parse_mail_address(str) local ret = {} if addr_parsed then for _,elt in ipairs(addr_parsed) do @@ -66,7 +66,7 @@ local function handle_header_def(hline, cur_rule) end elseif func == 'name' then cur_param['function'] = function(str) - local addr_parsed = util.parse_addr(str) + local addr_parsed = util.parse_mail_address(str) local ret = {} if addr_parsed then for _,elt in ipairs(addr_parsed) do |