diff options
Diffstat (limited to 'src/lua')
-rw-r--r-- | src/lua/lua_common.c | 52 | ||||
-rw-r--r-- | src/lua/lua_common.h | 10 | ||||
-rw-r--r-- | src/lua/lua_config.c | 686 | ||||
-rw-r--r-- | src/lua/lua_cryptobox.c | 2 | ||||
-rw-r--r-- | src/lua/lua_html.cxx | 269 | ||||
-rw-r--r-- | src/lua/lua_http.c | 123 | ||||
-rw-r--r-- | src/lua/lua_logger.c | 515 | ||||
-rw-r--r-- | src/lua/lua_map.c | 161 | ||||
-rw-r--r-- | src/lua/lua_mimepart.c | 39 | ||||
-rw-r--r-- | src/lua/lua_parsers.c | 85 | ||||
-rw-r--r-- | src/lua/lua_task.c | 82 | ||||
-rw-r--r-- | src/lua/lua_util.c | 164 |
12 files changed, 1813 insertions, 375 deletions
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 3a0f1a06c..f36228680 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -2401,7 +2401,7 @@ rspamd_lua_try_load_redis(lua_State *L, const ucl_object_t *obj, return FALSE; } -void rspamd_lua_push_full_word(lua_State *L, rspamd_stat_token_t *w) +void rspamd_lua_push_full_word(lua_State *L, rspamd_word_t *w) { int fl_cnt; @@ -2521,6 +2521,54 @@ int rspamd_lua_push_words(lua_State *L, GArray *words, return 1; } +int rspamd_lua_push_words_kvec(lua_State *L, rspamd_words_t *words, + enum rspamd_lua_words_type how) +{ + rspamd_word_t *w; + unsigned int i, cnt; + + if (!words || !words->a) { + lua_createtable(L, 0, 0); + return 1; + } + + lua_createtable(L, kv_size(*words), 0); + + for (i = 0, cnt = 1; i < kv_size(*words); i++) { + w = &kv_A(*words, i); + + switch (how) { + case RSPAMD_LUA_WORDS_STEM: + if (w->stemmed.len > 0) { + lua_pushlstring(L, w->stemmed.begin, w->stemmed.len); + lua_rawseti(L, -2, cnt++); + } + break; + case RSPAMD_LUA_WORDS_NORM: + if (w->normalized.len > 0) { + lua_pushlstring(L, w->normalized.begin, w->normalized.len); + lua_rawseti(L, -2, cnt++); + } + break; + case RSPAMD_LUA_WORDS_RAW: + if (w->original.len > 0) { + lua_pushlstring(L, w->original.begin, w->original.len); + lua_rawseti(L, -2, cnt++); + } + break; + case RSPAMD_LUA_WORDS_FULL: + rspamd_lua_push_full_word(L, w); + /* Push to the resulting vector */ + lua_rawseti(L, -2, cnt++); + break; + default: + break; + } + } + + return 1; +} + char * rspamd_lua_get_module_name(lua_State *L) { @@ -2658,4 +2706,4 @@ int rspamd_lua_geti(lua_State *L, int pos, int i) return lua_type(L, -1); } -#endif
\ No newline at end of file +#endif diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index a29444394..d494f0923 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -538,9 +538,8 @@ enum lua_logger_escape_type { * @param len * @return */ -gsize lua_logger_out_type(lua_State *L, int pos, char *outbuf, - gsize len, struct lua_logger_trace *trace, - enum lua_logger_escape_type esc_type); +gsize lua_logger_out(lua_State *L, int pos, char *outbuf, gsize len, + enum lua_logger_escape_type esc_type); /** * Safely checks userdata to match specified class @@ -633,7 +632,7 @@ struct rspamd_stat_token_s; * @param L * @param word */ -void rspamd_lua_push_full_word(lua_State *L, struct rspamd_stat_token_s *word); +void rspamd_lua_push_full_word(lua_State *L, rspamd_word_t *word); enum rspamd_lua_words_type { RSPAMD_LUA_WORDS_STEM = 0, @@ -652,6 +651,9 @@ enum rspamd_lua_words_type { int rspamd_lua_push_words(lua_State *L, GArray *words, enum rspamd_lua_words_type how); +int rspamd_lua_push_words_kvec(lua_State *L, rspamd_words_t *words, + enum rspamd_lua_words_type how); + /** * Returns newly allocated name for caller module name * @param L diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index 07ed58ad5..7e8ee39f2 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,11 +19,16 @@ #include "src/libserver/composites/composites.h" #include "libserver/cfg_file_private.h" #include "libmime/lang_detection.h" +#include "libserver/re_cache.h" #include "lua/lua_map.h" #include "lua/lua_thread_pool.h" #include "utlist.h" #include <math.h> +/* Forward declarations for custom tokenizer functions */ +gboolean rspamd_config_load_custom_tokenizers(struct rspamd_config *cfg, GError **err); +void rspamd_config_unload_custom_tokenizers(struct rspamd_config *cfg); + /*** * This module is used to configure rspamd and is normally available as global * variable named `rspamd_config`. Unlike other modules, it is not necessary to @@ -118,7 +123,7 @@ local function foo(task) end */ /*** -* @method rspamd_config:radix_from_ucl(obj) +* @method rspamd_config:radix_from_ucl(obj, description) * Creates new embedded map of IP/mask addresses from object. * @param {ucl} obj object * @return {map} radix tree object @@ -273,7 +278,7 @@ rspamd_config:register_dependency(id, 'OTHER_SYM') -- Alternative form -- Symbol MY_RULE needs result from SPF_CHECK rspamd_config:register_dependency('MY_RULE', 'SPF_CHECK') - */ + */ LUA_FUNCTION_DEF(config, register_dependency); /*** @@ -471,6 +476,13 @@ LUA_FUNCTION_DEF(config, get_group_symbols); LUA_FUNCTION_DEF(config, get_groups); /*** + * @method rspamd_config:promote_symbols_cache_resort() + * Promote symbols cache resort after dynamic symbol registration + * @return {boolean} true if successful + */ +LUA_FUNCTION_DEF(config, promote_symbols_cache_resort); + +/*** * @method rspamd_config:register_settings_id(name, symbols_enabled, symbols_disabled) * Register new static settings id in config * @param {string} name id name (not numeric!) @@ -558,6 +570,119 @@ LUA_FUNCTION_DEF(config, register_regexp); LUA_FUNCTION_DEF(config, replace_regexp); /*** + * @method rspamd_config:register_regexp_scoped(scope, params) + * Registers new re for further cached usage in a specific scope + * Params is the table with the following fields (mandatory fields are marked with `*`): + * - `re`* : regular expression object + * - `type`*: type of regular expression: + * + `mime`: mime regexp + * + `rawmime`: raw mime regexp + * + `header`: header regexp + * + `rawheader`: raw header expression + * + `body`: raw body regexp + * + `url`: url regexp + * - `header`: for header and rawheader regexp means the name of header + * - `pcre_only`: flag regexp as pcre only regexp + * @param {string} scope scope name for the regexp + * @param {table} params regexp parameters + */ +LUA_FUNCTION_DEF(config, register_regexp_scoped); + +/*** + * @method rspamd_config:replace_regexp_scoped(scope, params) + * Replaces regexp with a new one in a specific scope + * Params is the table with the following fields (mandatory fields are marked with `*`): + * - `old_re`* : old regular expression object (must be in the cache) + * - `new_re`* : old regular expression object (must not be in the cache) + * - `pcre_only`: flag regexp as pcre only regexp + * @param {string} scope scope name for the regexp + * @param {table} params regexp parameters + */ +LUA_FUNCTION_DEF(config, replace_regexp_scoped); + +/*** + * @method rspamd_config:register_re_selector_scoped(scope, name, selector_str, [delimiter, [flatten]]) + * Registers selector with the specific name in a specific scope to use in regular expressions + * @param {string} scope scope name for the selector + * @param {string} name name of the selector + * @param {string} selector_str selector definition + * @param {string} delimiter delimiter to use when joining strings if flatten is false + * @param {bool} flatten if true then selector will return a table of captures instead of a single string + * @return true if selector has been registered + */ +LUA_FUNCTION_DEF(config, register_re_selector_scoped); + +/*** + * @method rspamd_config:find_regexp_scope(scope) + * Checks if a regexp scope exists + * @param {string} scope scope name to check (can be nil for default scope) + * @return {boolean} true if scope exists + */ +LUA_FUNCTION_DEF(config, find_regexp_scope); + +/*** + * @method rspamd_config:remove_regexp_scope(scope) + * Removes a regexp scope from the cache + * @param {string} scope scope name to remove + * @return {boolean} true if scope was removed successfully + */ +LUA_FUNCTION_DEF(config, remove_regexp_scope); + +/*** + * @method rspamd_config:count_regexp_scopes() + * Returns the number of regexp scopes + * @return {number} number of scopes + */ +LUA_FUNCTION_DEF(config, count_regexp_scopes); + +/*** + * @method rspamd_config:list_regexp_scopes() + * Returns a list of all regexp scope names + * @return {table} array of scope names (default scope is named "default") + */ +LUA_FUNCTION_DEF(config, list_regexp_scopes); + +/*** + * @method rspamd_config:set_regexp_scope_flags(scope, flags) + * Sets flags for a regexp scope + * @param {string} scope scope name (can be nil for default scope) + * @param {number} flags flags to set + */ +LUA_FUNCTION_DEF(config, set_regexp_scope_flags); + +/*** + * @method rspamd_config:clear_regexp_scope_flags(scope, flags) + * Clears flags for a regexp scope + * @param {string} scope scope name (can be nil for default scope) + * @param {number} flags flags to clear + */ +LUA_FUNCTION_DEF(config, clear_regexp_scope_flags); + +/*** + * @method rspamd_config:get_regexp_scope_flags(scope) + * Gets flags for a regexp scope + * @param {string} scope scope name (can be nil for default scope) + * @return {number} current flags value + */ +LUA_FUNCTION_DEF(config, get_regexp_scope_flags); + +/*** + * @method rspamd_config:is_regexp_scope_loaded(scope) + * Checks if a regexp scope is loaded and ready for use + * @param {string} scope scope name (can be nil for default scope) + * @return {boolean} true if scope is loaded + */ +LUA_FUNCTION_DEF(config, is_regexp_scope_loaded); + +/*** + * @method rspamd_config:set_regexp_scope_loaded(scope, loaded) + * Sets the loaded state of a regexp scope + * @param {string} scope scope name (can be nil for default scope) + * @param {boolean} loaded whether scope should be marked as loaded (defaults to true) + */ +LUA_FUNCTION_DEF(config, set_regexp_scope_loaded); + +/*** * @method rspamd_config:register_worker_script(worker_type, script) * Registers the following script for workers of a specified type. The exact type * of script function depends on worker type @@ -862,6 +987,19 @@ LUA_FUNCTION_DEF(config, get_dns_max_requests); */ LUA_FUNCTION_DEF(config, get_dns_timeout); +/*** + * @method rspamd_config:load_custom_tokenizers() + * Loads custom tokenizers from configuration + * @return {boolean} true if successful + */ +LUA_FUNCTION_DEF(config, load_custom_tokenizers); + +/*** + * @method rspamd_config:unload_custom_tokenizers() + * Unloads custom tokenizers and frees memory + */ +LUA_FUNCTION_DEF(config, unload_custom_tokenizers); + static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF(config, get_module_opt), LUA_INTERFACE_DEF(config, get_mempool), @@ -882,6 +1020,7 @@ static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF(config, register_callback_symbol), LUA_INTERFACE_DEF(config, register_callback_symbol_priority), LUA_INTERFACE_DEF(config, register_dependency), + LUA_INTERFACE_DEF(config, promote_symbols_cache_resort), LUA_INTERFACE_DEF(config, register_settings_id), LUA_INTERFACE_DEF(config, get_symbol_flags), LUA_INTERFACE_DEF(config, set_metric_symbol), @@ -903,6 +1042,18 @@ static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF(config, disable_symbol), LUA_INTERFACE_DEF(config, register_regexp), LUA_INTERFACE_DEF(config, replace_regexp), + LUA_INTERFACE_DEF(config, register_regexp_scoped), + LUA_INTERFACE_DEF(config, replace_regexp_scoped), + LUA_INTERFACE_DEF(config, register_re_selector_scoped), + LUA_INTERFACE_DEF(config, find_regexp_scope), + LUA_INTERFACE_DEF(config, remove_regexp_scope), + LUA_INTERFACE_DEF(config, count_regexp_scopes), + LUA_INTERFACE_DEF(config, list_regexp_scopes), + LUA_INTERFACE_DEF(config, set_regexp_scope_flags), + LUA_INTERFACE_DEF(config, clear_regexp_scope_flags), + LUA_INTERFACE_DEF(config, get_regexp_scope_flags), + LUA_INTERFACE_DEF(config, is_regexp_scope_loaded), + LUA_INTERFACE_DEF(config, set_regexp_scope_loaded), LUA_INTERFACE_DEF(config, register_worker_script), LUA_INTERFACE_DEF(config, register_re_selector), LUA_INTERFACE_DEF(config, add_on_load), @@ -937,6 +1088,8 @@ static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF(config, get_tld_path), LUA_INTERFACE_DEF(config, get_dns_max_requests), LUA_INTERFACE_DEF(config, get_dns_timeout), + LUA_INTERFACE_DEF(config, load_custom_tokenizers), + LUA_INTERFACE_DEF(config, unload_custom_tokenizers), {"__tostring", rspamd_lua_class_tostring}, {"__newindex", lua_config_newindex}, {NULL, NULL}}; @@ -4268,6 +4421,23 @@ lua_config_experimental_enabled(lua_State *L) return 1; } +static int +lua_config_promote_symbols_cache_resort(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg != NULL && cfg->cache != NULL) { + rspamd_symcache_promote_resort(cfg->cache); + lua_pushboolean(L, true); + } + else { + return luaL_error(L, "invalid arguments or cache not initialized"); + } + + return 1; +} + struct rspamd_lua_include_trace_cbdata { lua_State *L; int cbref; @@ -4485,11 +4655,14 @@ lua_config_init_subsystem(lua_State *L) nparts = g_strv_length(parts); for (i = 0; i < nparts; i++) { - if (strcmp(parts[i], "filters") == 0) { + const char *str = parts[i]; + + /* TODO: total shit, rework some day */ + if (strcmp(str, "filters") == 0) { rspamd_lua_post_load_config(cfg); rspamd_init_filters(cfg, false, false); } - else if (strcmp(parts[i], "langdet") == 0) { + else if (strcmp(str, "langdet") == 0) { if (!cfg->lang_det) { cfg->lang_det = rspamd_language_detector_init(cfg); rspamd_mempool_add_destructor(cfg->cfg_pool, @@ -4497,10 +4670,10 @@ lua_config_init_subsystem(lua_State *L) cfg->lang_det); } } - else if (strcmp(parts[i], "stat") == 0) { + else if (strcmp(str, "stat") == 0) { rspamd_stat_init(cfg, NULL); } - else if (strcmp(parts[i], "dns") == 0) { + else if (strcmp(str, "dns") == 0) { struct ev_loop *ev_base = lua_check_ev_base(L, 3); if (ev_base) { @@ -4514,11 +4687,25 @@ lua_config_init_subsystem(lua_State *L) return luaL_error(L, "no event base specified"); } } - else if (strcmp(parts[i], "symcache") == 0) { + else if (strcmp(str, "symcache") == 0) { rspamd_symcache_init(cfg->cache); } + else if (strcmp(str, "tokenizers") == 0 || strcmp(str, "custom_tokenizers") == 0) { + GError *err = NULL; + if (!rspamd_config_load_custom_tokenizers(cfg, &err)) { + g_strfreev(parts); + if (err) { + int ret = luaL_error(L, "failed to load custom tokenizers: %s", err->message); + g_error_free(err); + return ret; + } + else { + return luaL_error(L, "failed to load custom tokenizers"); + } + } + } else { - int ret = luaL_error(L, "invalid param: %s", parts[i]); + int ret = luaL_error(L, "invalid param: %s", str); g_strfreev(parts); return ret; @@ -4750,6 +4937,10 @@ void luaopen_config(lua_State *L) rspamd_lua_new_class(L, rspamd_monitored_classname, monitoredlib_m); lua_pop(L, 1); + + /* Export constants */ + lua_pushinteger(L, RSPAMD_RE_CACHE_FLAG_LOADED); + lua_setglobal(L, "RSPAMD_RE_CACHE_FLAG_LOADED"); } void lua_call_finish_script(struct rspamd_config_cfg_lua_script *sc, @@ -4772,3 +4963,480 @@ void lua_call_finish_script(struct rspamd_config_cfg_lua_script *sc, lua_thread_call(thread, 1); } + +static int +lua_config_load_custom_tokenizers(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg != NULL) { + GError *err = NULL; + gboolean ret = rspamd_config_load_custom_tokenizers(cfg, &err); + + if (!ret && err) { + lua_pushboolean(L, FALSE); + lua_pushstring(L, err->message); + g_error_free(err); + return 2; + } + + lua_pushboolean(L, ret); + return 1; + } + else { + return luaL_error(L, "invalid arguments"); + } +} + +static int +lua_config_unload_custom_tokenizers(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg != NULL) { + rspamd_config_unload_custom_tokenizers(cfg); + return 0; + } + else { + return luaL_error(L, "invalid arguments"); + } +} + +static int +lua_config_register_regexp_scoped(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = luaL_checkstring(L, 2); + struct rspamd_lua_regexp *re = NULL; + rspamd_regexp_t *cache_re; + const char *type_str = NULL, *header_str = NULL; + gsize header_len = 0; + GError *err = NULL; + enum rspamd_re_type type = RSPAMD_RE_BODY; + gboolean pcre_only = FALSE; + + /* + * - `scope`*: scope name for the regexp + * - `re`* : regular expression object + * - `type`*: type of regular expression: + * + `mime`: mime regexp + * + `rawmime`: raw mime regexp + * + `header`: header regexp + * + `rawheader`: raw header expression + * + `body`: raw body regexp + * + `url`: url regexp + * - `header`: for header and rawheader regexp means the name of header + * - `pcre_only`: allow merely pcre for this regexp + */ + if (cfg != NULL && scope != NULL) { + if (!rspamd_lua_parse_table_arguments(L, 3, &err, + RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT, + "*re=U{regexp};*type=S;header=S;pcre_only=B", + &re, &type_str, &header_str, &pcre_only)) { + msg_err_config("cannot get parameters list: %e", err); + + if (err) { + g_error_free(err); + } + } + else { + type = rspamd_re_cache_type_from_string(type_str); + + if ((type == RSPAMD_RE_HEADER || + type == RSPAMD_RE_RAWHEADER || + type == RSPAMD_RE_MIMEHEADER) && + header_str == NULL) { + msg_err_config( + "header argument is mandatory for header/rawheader regexps"); + } + else { + if (pcre_only) { + rspamd_regexp_set_flags(re->re, + rspamd_regexp_get_flags(re->re) | RSPAMD_REGEXP_FLAG_PCRE_ONLY); + } + + if (header_str != NULL) { + /* Include the last \0 */ + header_len = strlen(header_str) + 1; + } + + cache_re = rspamd_re_cache_add_scoped(&cfg->re_cache, scope, re->re, type, + (gpointer) header_str, header_len, -1); + + /* + * XXX: here are dragons! + * Actually, lua regexp contains internal rspamd_regexp_t + * and it owns it. + * However, after this operation we have some OTHER regexp, + * which we really would like to use. + * So we do the following: + * 1) Remove old re and unref it + * 2) Replace the internal re with cached one + * 3) Increase its refcount to share ownership between cache and + * lua object + */ + if (cache_re != re->re) { + rspamd_regexp_unref(re->re); + re->re = rspamd_regexp_ref(cache_re); + + if (pcre_only) { + rspamd_regexp_set_flags(re->re, + rspamd_regexp_get_flags(re->re) | RSPAMD_REGEXP_FLAG_PCRE_ONLY); + } + } + } + } + } + + return 0; +} + +static int +lua_config_replace_regexp_scoped(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = luaL_checkstring(L, 2); + struct rspamd_lua_regexp *old_re = NULL, *new_re = NULL; + gboolean pcre_only = FALSE; + GError *err = NULL; + + if (cfg != NULL && scope != NULL) { + if (!rspamd_lua_parse_table_arguments(L, 3, &err, + RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT, + "*old_re=U{regexp};*new_re=U{regexp};pcre_only=B", + &old_re, &new_re, &pcre_only)) { + int ret = luaL_error(L, "cannot get parameters list: %s", + err ? err->message : "invalid arguments"); + + if (err) { + g_error_free(err); + } + + return ret; + } + else { + + if (pcre_only) { + rspamd_regexp_set_flags(new_re->re, + rspamd_regexp_get_flags(new_re->re) | RSPAMD_REGEXP_FLAG_PCRE_ONLY); + } + + rspamd_re_cache_replace_scoped(&cfg->re_cache, scope, old_re->re, new_re->re); + } + } + + return 0; +} + +static int +lua_config_register_re_selector_scoped(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = luaL_checkstring(L, 2); + const char *name = luaL_checkstring(L, 3); + const char *selector_str = luaL_checkstring(L, 4); + const char *delimiter = ""; + bool flatten = false; + int top = lua_gettop(L); + bool res = false; + + if (cfg && scope && name && selector_str) { + if (lua_gettop(L) >= 5) { + delimiter = luaL_checkstring(L, 5); + + if (lua_isboolean(L, 6)) { + flatten = lua_toboolean(L, 6); + } + } + + if (luaL_dostring(L, "return require \"lua_selectors\"") != 0) { + msg_warn_config("cannot require lua_selectors: %s", + lua_tostring(L, -1)); + } + else { + if (lua_type(L, -1) != LUA_TTABLE) { + msg_warn_config("lua selectors must return " + "table and not %s", + lua_typename(L, lua_type(L, -1))); + } + else { + lua_pushstring(L, "create_selector_closure"); + lua_gettable(L, -2); + + if (lua_type(L, -1) != LUA_TFUNCTION) { + msg_warn_config("create_selector_closure must return " + "function and not %s", + lua_typename(L, lua_type(L, -1))); + } + else { + int err_idx, ret; + struct rspamd_config **pcfg; + + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + + /* Push function */ + lua_pushvalue(L, -2); + + pcfg = lua_newuserdata(L, sizeof(*pcfg)); + rspamd_lua_setclass(L, rspamd_config_classname, -1); + *pcfg = cfg; + lua_pushstring(L, selector_str); + lua_pushstring(L, delimiter); + lua_pushboolean(L, flatten); + + if ((ret = lua_pcall(L, 4, 1, err_idx)) != 0) { + msg_err_config("call to create_selector_closure lua " + "script failed (%d): %s", + ret, + lua_tostring(L, -1)); + } + else { + if (lua_type(L, -1) != LUA_TFUNCTION) { + msg_warn_config("create_selector_closure " + "invocation must return " + "function and not %s", + lua_typename(L, lua_type(L, -1))); + } + else { + ret = luaL_ref(L, LUA_REGISTRYINDEX); + rspamd_re_cache_add_selector_scoped(&cfg->re_cache, scope, + name, ret); + res = true; + } + } + } + } + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + lua_settop(L, top); + lua_pushboolean(L, res); + + if (res) { + msg_info_config("registered regexp selector %s for scope %s", name, scope); + } + + return 1; +} + +static int +lua_config_find_regexp_scope(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = NULL; + + if (cfg) { + if (lua_type(L, 2) == LUA_TSTRING) { + scope = lua_tostring(L, 2); + } + /* scope can be NULL for default scope */ + + struct rspamd_re_cache *found_cache = rspamd_re_cache_find_scope(cfg->re_cache, scope); + lua_pushboolean(L, found_cache != NULL); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_config_remove_regexp_scope(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = luaL_checkstring(L, 2); + + if (cfg && scope) { + gboolean result = rspamd_re_cache_remove_scope(&cfg->re_cache, scope); + lua_pushboolean(L, result); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_config_count_regexp_scopes(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg) { + unsigned int count = rspamd_re_cache_count_scopes(cfg->re_cache); + lua_pushinteger(L, count); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_config_list_regexp_scopes(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg) { + struct rspamd_re_cache *scope; + unsigned int i = 0; + + lua_newtable(L); + + for (scope = rspamd_re_cache_scope_first(cfg->re_cache); + scope != NULL; + scope = rspamd_re_cache_scope_next(scope)) { + lua_pushinteger(L, i + 1); + lua_pushstring(L, rspamd_re_cache_scope_name(scope)); + lua_settable(L, -3); + i++; + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_config_set_regexp_scope_flags(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = NULL; + unsigned int flags = 0; + + if (cfg) { + if (lua_type(L, 2) == LUA_TSTRING) { + scope = lua_tostring(L, 2); + } + flags = lua_tointeger(L, 3); + + rspamd_re_cache_set_flags(cfg->re_cache, scope, flags); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 0; +} + +static int +lua_config_clear_regexp_scope_flags(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = NULL; + unsigned int flags = 0; + + if (cfg) { + if (lua_type(L, 2) == LUA_TSTRING) { + scope = lua_tostring(L, 2); + } + flags = lua_tointeger(L, 3); + + rspamd_re_cache_clear_flags(cfg->re_cache, scope, flags); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 0; +} + +static int +lua_config_get_regexp_scope_flags(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = NULL; + + if (cfg) { + if (lua_type(L, 2) == LUA_TSTRING) { + scope = lua_tostring(L, 2); + } + + unsigned int flags = rspamd_re_cache_get_flags(cfg->re_cache, scope); + lua_pushinteger(L, flags); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_config_is_regexp_scope_loaded(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = NULL; + + if (cfg) { + if (lua_type(L, 2) == LUA_TSTRING) { + scope = lua_tostring(L, 2); + } + + gboolean loaded = rspamd_re_cache_is_loaded(cfg->re_cache, scope); + lua_pushboolean(L, loaded); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_config_set_regexp_scope_loaded(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = NULL; + gboolean loaded = TRUE; + + if (cfg) { + if (lua_type(L, 2) == LUA_TSTRING) { + scope = lua_tostring(L, 2); + } + if (lua_type(L, 3) == LUA_TBOOLEAN) { + loaded = lua_toboolean(L, 3); + } + + if (loaded) { + rspamd_re_cache_set_flags(cfg->re_cache, scope, RSPAMD_RE_CACHE_FLAG_LOADED); + + /* When marking a scope as loaded, we also need to initialize it + * to compute the hash for each re_class */ + struct rspamd_re_cache *target_cache = rspamd_re_cache_find_scope(cfg->re_cache, scope); + if (target_cache) { + rspamd_re_cache_init(target_cache, cfg); + } + } + else { + rspamd_re_cache_clear_flags(cfg->re_cache, scope, RSPAMD_RE_CACHE_FLAG_LOADED); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 0; +} diff --git a/src/lua/lua_cryptobox.c b/src/lua/lua_cryptobox.c index 721d71256..2c2254920 100644 --- a/src/lua/lua_cryptobox.c +++ b/src/lua/lua_cryptobox.c @@ -404,7 +404,7 @@ lua_cryptobox_keypair_load(lua_State *L) if (lua_type(L, 1) == LUA_TSTRING) { buf = luaL_checklstring(L, 1, &len); if (buf != NULL) { - parser = ucl_parser_new(0); + parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS); if (!ucl_parser_add_chunk(parser, buf, len)) { msg_err("cannot open keypair from data: %s", diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx index 090e2af55..9b0deed45 100644 --- a/src/lua/lua_html.cxx +++ b/src/lua/lua_html.cxx @@ -179,6 +179,44 @@ LUA_FUNCTION_DEF(html_tag, get_style); */ LUA_FUNCTION_DEF(html_tag, get_attribute); +/*** + * @method html_tag:get_all_attributes() + * Returns table of all attributes for the element + * @return {table} table with attribute names as keys and values as strings + */ +LUA_FUNCTION_DEF(html_tag, get_all_attributes); + +/*** + * @method html_tag:get_unknown_attributes() + * Returns table of unknown/unrecognized attributes for the element + * @return {table} table with unknown attribute names as keys and values as strings + */ +LUA_FUNCTION_DEF(html_tag, get_unknown_attributes); + +/*** + * @method html_tag:get_children() + * Returns array of child tags for the element + * @return {table} array of child html_tag objects + */ +LUA_FUNCTION_DEF(html_tag, get_children); + +/*** + * @method html_tag:has_attribute(name) + * Checks if element has the specified attribute + * @param {string} name attribute name to check + * @return {boolean} true if attribute exists + */ +LUA_FUNCTION_DEF(html_tag, has_attribute); + +/*** + * @method html_tag:get_numeric_attribute(name) + * Returns numeric value of attribute (if supported and parseable) + * Works for attributes like width, height, font-size, etc. + * @param {string} name attribute name + * @return {number|nil} numeric value or nil if not numeric/parseable + */ +LUA_FUNCTION_DEF(html_tag, get_numeric_attribute); + static const struct luaL_reg taglib_m[] = { LUA_INTERFACE_DEF(html_tag, get_type), LUA_INTERFACE_DEF(html_tag, get_extra), @@ -188,6 +226,11 @@ static const struct luaL_reg taglib_m[] = { LUA_INTERFACE_DEF(html_tag, get_content_length), LUA_INTERFACE_DEF(html_tag, get_style), LUA_INTERFACE_DEF(html_tag, get_attribute), + LUA_INTERFACE_DEF(html_tag, get_all_attributes), + LUA_INTERFACE_DEF(html_tag, get_unknown_attributes), + LUA_INTERFACE_DEF(html_tag, get_children), + LUA_INTERFACE_DEF(html_tag, has_attribute), + LUA_INTERFACE_DEF(html_tag, get_numeric_attribute), {"__tostring", rspamd_lua_class_tostring}, {NULL, NULL}}; @@ -704,6 +747,29 @@ lua_html_tag_get_style(lua_State *L) } static int +lua_html_tag_get_all_attributes(lua_State *L) +{ + LUA_TRACE_POINT; + struct lua_html_tag *ltag = lua_check_html_tag(L, 1); + + if (ltag) { + auto all_attrs = ltag->tag->get_all_attributes(); + lua_createtable(L, 0, all_attrs.size()); + + for (const auto &[name, value]: all_attrs) { + lua_pushlstring(L, name.data(), name.size()); + lua_pushlstring(L, value.data(), value.size()); + lua_settable(L, -3); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int lua_html_tag_get_attribute(lua_State *L) { LUA_TRACE_POINT; @@ -712,8 +778,7 @@ lua_html_tag_get_attribute(lua_State *L) const char *attr_name = luaL_checklstring(L, 2, &slen); if (ltag && attr_name) { - auto maybe_attr = ltag->tag->find_component( - rspamd::html::html_component_from_string({attr_name, slen})); + auto maybe_attr = ltag->tag->find_component_by_name({attr_name, slen}); if (maybe_attr) { lua_pushlstring(L, maybe_attr->data(), maybe_attr->size()); @@ -729,6 +794,206 @@ lua_html_tag_get_attribute(lua_State *L) return 1; } +static int +lua_html_tag_get_unknown_attributes(lua_State *L) +{ + LUA_TRACE_POINT; + struct lua_html_tag *ltag = lua_check_html_tag(L, 1); + + if (ltag) { + auto unknown_attrs = ltag->tag->get_unknown_components(); + lua_createtable(L, 0, unknown_attrs.size()); + + for (const auto &[name, value]: unknown_attrs) { + lua_pushlstring(L, name.data(), name.size()); + lua_pushlstring(L, value.data(), value.size()); + lua_settable(L, -3); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_html_tag_get_children(lua_State *L) +{ + LUA_TRACE_POINT; + struct lua_html_tag *ltag = lua_check_html_tag(L, 1); + + if (ltag) { + lua_createtable(L, ltag->tag->children.size(), 0); + + for (int i = 0; i < ltag->tag->children.size(); i++) { + auto *child_tag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag))); + child_tag->tag = ltag->tag->children[i]; + child_tag->html = ltag->html; + rspamd_lua_setclass(L, rspamd_html_tag_classname, -1); + lua_rawseti(L, -2, i + 1); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_html_tag_has_attribute(lua_State *L) +{ + LUA_TRACE_POINT; + struct lua_html_tag *ltag = lua_check_html_tag(L, 1); + gsize slen; + const char *attr_name = luaL_checklstring(L, 2, &slen); + + if (ltag && attr_name) { + auto maybe_attr = ltag->tag->find_component_by_name({attr_name, slen}); + lua_pushboolean(L, maybe_attr.has_value()); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_html_tag_get_numeric_attribute(lua_State *L) +{ + LUA_TRACE_POINT; + struct lua_html_tag *ltag = lua_check_html_tag(L, 1); + gsize slen; + const char *attr_name = luaL_checklstring(L, 2, &slen); + + if (ltag && attr_name) { + std::string_view name_view{attr_name, slen}; + + // Check for numeric components + if (name_view == "width") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_width>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "height") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_height>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "size") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_size>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "font-size") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_font_size>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "line-height") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_line_height>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "border-width") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_border_width>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "opacity") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_opacity>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushnumber(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "min-width") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_min_width>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "max-width") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_max_width>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "min-height") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_min_height>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "max-height") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_max_height>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "cellpadding") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_cellpadding>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "cellspacing") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_cellspacing>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + else if (name_view == "tabindex") { + if (auto comp = ltag->tag->find_component<rspamd::html::html_component_tabindex>()) { + if (auto numeric_val = comp.value()->get_numeric_value()) { + lua_pushinteger(L, numeric_val.value()); + return 1; + } + } + } + + lua_pushnil(L); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + void luaopen_html(lua_State *L) { rspamd_lua_new_class(L, rspamd_html_classname, htmllib_m); diff --git a/src/lua/lua_http.c b/src/lua/lua_http.c index 7e9e7b1df..731b8b057 100644 --- a/src/lua/lua_http.c +++ b/src/lua/lua_http.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,22 +29,123 @@ * This module hides all complexity: DNS resolving, sessions management, zero-copy * text transfers and so on under the hood. * @example +-- Basic GET request with callback local rspamd_http = require "rspamd_http" local function symbol_callback(task) local function http_callback(err_message, code, body, headers) task:insert_result('SYMBOL', 1) -- task is available via closure + + if err_message then + -- Handle error + return + end + + -- Process response + if code == 200 then + -- Process body and headers + for name, value in pairs(headers) do + -- Headers are lowercase + end + end end - rspamd_http.request({ - task=task, - url='http://example.com/data', - body=task:get_content(), - callback=http_callback, - headers={Header='Value', OtherHeader='Value'}, - mime_type='text/plain', - }) - end + rspamd_http.request({ + task=task, + url='http://example.com/data', + body=task:get_content(), + callback=http_callback, + headers={Header='Value', OtherHeader='Value', DuplicatedHeader={'Multiple', 'Values'}}, + mime_type='text/plain', + }) +end + +-- POST request with JSON body +local function post_json_example(task) + local ucl = require "ucl" + local data = { + id = task:get_queue_id(), + sender = task:get_from()[1].addr + } + + local json_data = ucl.to_json(data) + + rspamd_http.request({ + task = task, + url = "http://example.com/api/submit", + method = "POST", + body = json_data, + headers = {['Content-Type'] = 'application/json'}, + callback = function(err, code, body, headers) + if not err and code == 200 then + -- Success + end + end + }) +end + +-- Synchronous HTTP request (using coroutines) +local function sync_http_example(task) + -- No callback makes this a synchronous call + local err, response = rspamd_http.request({ + task = task, + url = "http://example.com/api/data", + method = "GET", + timeout = 10.0 + }) + + if not err then + -- Response is a table with code, content, and headers + if response.code == 200 then + -- Process response.content + return true + end + end + return false +end + +-- Using authentication +local function auth_example(task) + rspamd_http.request({ + task = task, + url = "https://example.com/api/protected", + method = "GET", + user = "username", + password = "secret", + callback = function(err, code, body, headers) + -- Process authenticated response + end + }) +end + +-- Using HTTPS with SSL options +local function https_example(task) + rspamd_http.request({ + task = task, + url = "https://example.com/api/secure", + method = "GET", + no_ssl_verify = false, -- Verify SSL (default) + callback = function(err, code, body, headers) + -- Process secure response + end + }) +end + +-- Using keep-alive and gzip +local function advanced_example(task) + rspamd_http.request({ + task = task, + url = "http://example.com/api/data", + method = "POST", + body = task:get_content(), + gzip = true, -- Compress request body + keepalive = true, -- Use keep-alive connection + max_size = 1024 * 1024, -- Limit response to 1MB + callback = function(err, code, body, headers) + -- Process response + end + }) +end */ #define MAX_HEADERS_SIZE 8192 @@ -602,7 +703,7 @@ lua_http_push_headers(lua_State *L, struct rspamd_http_message *msg) * @param {string} url specifies URL for a request in the standard URI form (e.g. 'http://example.com/path') * @param {function} callback specifies callback function in format `function (err_message, code, body, headers)` that is called on HTTP request completion. if this parameter is missing, the function performs "pseudo-synchronous" call (see [Synchronous and Asynchronous API overview](/doc/developers/sync_async.html#API-example-http-module) * @param {task} task if called from symbol handler it is generally a good idea to use the common task objects: event base, DNS resolver and events session - * @param {table} headers optional headers in form `[name='value', name='value']` + * @param {table} headers optional headers in form `[name='value']` or `[name=['value1', 'value2']]` to duplicate a header with multiple values * @param {string} mime_type MIME type of the HTTP content (for example, `text/html`) * @param {string/text} body full body content, can be opaque `rspamd{text}` to avoid data copying * @param {number} timeout floating point request timeout value in seconds (default is 5.0 seconds) diff --git a/src/lua/lua_logger.c b/src/lua/lua_logger.c index 004b82e72..04ff81b6d 100644 --- a/src/lua/lua_logger.c +++ b/src/lua/lua_logger.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -174,6 +174,11 @@ static const struct luaL_reg loggerlib_f[] = { {"__tostring", rspamd_lua_class_tostring}, {NULL, NULL}}; +static gsize +lua_logger_out_type(lua_State *L, int pos, char *outbuf, + gsize len, struct lua_logger_trace *trace, + enum lua_logger_escape_type esc_type); + static void lua_common_log_line(GLogLevelFlags level, lua_State *L, @@ -203,23 +208,19 @@ lua_common_log_line(GLogLevelFlags level, d.currentline); } - rspamd_common_log_function(NULL, - level, - module, - uid, - func_buf, - "%s", - msg); + p = func_buf; } else { - rspamd_common_log_function(NULL, - level, - module, - uid, - G_STRFUNC, - "%s", - msg); + p = (char *) G_STRFUNC; } + + rspamd_common_log_function(NULL, + level, + module, + uid, + p, + "%s", + msg); } /*** Logger interface ***/ @@ -279,105 +280,161 @@ lua_logger_char_safe(int t, unsigned int esc_type) return true; } -static gsize -lua_logger_out_str(lua_State *L, int pos, - char *outbuf, gsize len, - struct lua_logger_trace *trace, - enum lua_logger_escape_type esc_type) +#define LUA_MAX_ARGS 32 +/* Gracefully handles argument mismatches by substituting missing args and noting extra args */ +static glong +lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain, + const char *fmt, + enum lua_logger_escape_type esc_type) { - gsize slen, flen; - const char *str = lua_tolstring(L, pos, &slen); - static const char hexdigests[16] = "0123456789abcdef"; - gsize r = 0, s; - - if (str) { - gboolean normal = TRUE; - flen = MIN(slen, len - 1); + const char *c; + gsize r; + int digit; + char *d = logbuf; + unsigned int arg_num, cur_arg = 0, arg_max = lua_gettop(L) - offset; + gboolean args_used[LUA_MAX_ARGS]; + unsigned int used_args_count = 0; + + memset(args_used, 0, sizeof(args_used)); + while (remain > 1 && *fmt) { + if (*fmt == '%') { + ++fmt; + c = fmt; + if (*fmt == 's') { + ++fmt; + ++cur_arg; + } + else { + arg_num = 0; + while ((digit = g_ascii_digit_value(*fmt)) >= 0) { + ++fmt; + arg_num = arg_num * 10 + digit; + if (arg_num >= LUA_MAX_ARGS) { + /* Avoid ridiculously large numbers */ + fmt = c; + break; + } + } - for (r = 0; r < flen; r++) { - if (!lua_logger_char_safe(str[r], esc_type)) { - normal = FALSE; - break; + if (fmt > c) { + /* Update the current argument */ + cur_arg = arg_num; + } } - } - if (normal) { - r = rspamd_strlcpy(outbuf, str, flen + 1); - } - else { - /* Need to escape non-printed characters */ - r = 0; - s = 0; - - while (slen > 0 && len > 1) { - if (!lua_logger_char_safe(str[s], esc_type)) { - if (len >= 3) { - outbuf[r++] = '\\'; - outbuf[r++] = hexdigests[((str[s] >> 4) & 0xF)]; - outbuf[r++] = hexdigests[((str[s]) & 0xF)]; - - len -= 2; - } - else { - outbuf[r++] = '?'; - } + if (fmt > c) { + if (cur_arg < 1 || cur_arg > arg_max) { + /* Missing argument - substitute placeholder */ + r = rspamd_snprintf(d, remain, "<MISSING ARGUMENT>"); } else { - outbuf[r++] = str[s]; + /* Valid argument - output it */ + r = lua_logger_out(L, offset + cur_arg, d, remain, esc_type); + /* Track which arguments are used */ + if (cur_arg <= LUA_MAX_ARGS && !args_used[cur_arg - 1]) { + args_used[cur_arg - 1] = TRUE; + used_args_count++; + } } - s++; - slen--; - len--; + g_assert(r < remain); + remain -= r; + d += r; + continue; } - outbuf[r] = '\0'; + /* Copy % */ + --fmt; } + + *d++ = *fmt++; + --remain; } - return r; + /* Check for extra arguments and append warning if any */ + if (used_args_count > 0 && used_args_count < arg_max && remain > 1) { + unsigned int extra_args = arg_max - used_args_count; + r = rspamd_snprintf(d, remain, " <EXTRA %d ARGUMENTS>", (int) extra_args); + remain -= r; + d += r; + } + + *d = 0; + + return d - logbuf; } +#undef LUA_MAX_ARGS + static gsize -lua_logger_out_num(lua_State *L, int pos, char *outbuf, gsize len, - struct lua_logger_trace *trace) +lua_logger_out_str(lua_State *L, int pos, + char *outbuf, gsize len, + enum lua_logger_escape_type esc_type) { - double num = lua_tonumber(L, pos); - glong inum; - gsize r = 0; + static const char hexdigests[16] = "0123456789abcdef"; + gsize slen; + const unsigned char *str = lua_tolstring(L, pos, &slen); + unsigned char c; + char *out = outbuf; - if ((double) (glong) num == num) { - inum = num; - r = rspamd_snprintf(outbuf, len + 1, "%l", inum); + if (str) { + while (slen > 0 && len > 1) { + c = *str++; + if (lua_logger_char_safe(c, esc_type)) { + *out++ = c; + } + else if (len > 3) { + /* Need to escape non-printed characters */ + *out++ = '\\'; + *out++ = hexdigests[c >> 4]; + *out++ = hexdigests[c & 0xF]; + len -= 2; + } + else { + *out++ = '?'; + } + --slen; + --len; + } } - else { - r = rspamd_snprintf(outbuf, len + 1, "%f", num); + *out = 0; + + return out - outbuf; +} + +static gsize +lua_logger_out_num(lua_State *L, int pos, char *outbuf, gsize len) +{ + double num = lua_tonumber(L, pos); + glong inum = (glong) num; + + if ((double) inum == num) { + return rspamd_snprintf(outbuf, len, "%l", inum); } - return r; + return rspamd_snprintf(outbuf, len, "%f", num); } static gsize -lua_logger_out_boolean(lua_State *L, int pos, char *outbuf, gsize len, - struct lua_logger_trace *trace) +lua_logger_out_boolean(lua_State *L, int pos, char *outbuf, gsize len) { gboolean val = lua_toboolean(L, pos); - gsize r = 0; - r = rspamd_strlcpy(outbuf, val ? "true" : "false", len + 1); - - return r; + return rspamd_snprintf(outbuf, len, val ? "true" : "false"); } static gsize -lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len, - struct lua_logger_trace *trace) +lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len) { - int r = 0, top; + gsize r = 0; + int top; const char *str = NULL; gboolean converted_to_str = FALSE; top = lua_gettop(L); + if (pos < 0) { + pos += top + 1; /* Convert to absolute */ + } if (!lua_getmetatable(L, pos)) { return 0; @@ -396,26 +453,17 @@ lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len, if (lua_isfunction(L, -1)) { lua_pushvalue(L, pos); - if (lua_pcall(L, 1, 1, 0) != 0) { - lua_settop(L, top); - - return 0; - } - - str = lua_tostring(L, -1); - - if (str) { - r = rspamd_snprintf(outbuf, len, "%s", str); + if (lua_pcall(L, 1, 1, 0) == 0) { + str = lua_tostring(L, -1); + if (str) { + r = rspamd_snprintf(outbuf, len, "%s", str); + } } - - lua_settop(L, top); - - return r; } } lua_settop(L, top); - return 0; + return r; } lua_pushstring(L, "__tostring"); @@ -460,12 +508,12 @@ lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len, return r; } -#define MOVE_BUF(d, remain, r) \ - (d) += (r); \ - (remain) -= (r); \ - if ((remain) == 0) { \ - lua_settop(L, old_top); \ - break; \ +#define MOVE_BUF(d, remain, r) \ + (d) += (r); \ + (remain) -= (r); \ + if ((remain) <= 1) { \ + lua_settop(L, top); \ + goto table_oob; \ } static gsize @@ -473,169 +521,153 @@ lua_logger_out_table(lua_State *L, int pos, char *outbuf, gsize len, struct lua_logger_trace *trace, enum lua_logger_escape_type esc_type) { - char *d = outbuf; - gsize remain = len, r; + char *d = outbuf, *str; + gsize remain = len; + glong r; gboolean first = TRUE; gconstpointer self = NULL; - int i, tpos, last_seq = -1, old_top; + int i, last_seq = 0, top; + double num; + glong inum; - if (!lua_istable(L, pos) || remain == 0) { - return 0; - } + /* Type and length checks are done in logger_out_type() */ - old_top = lua_gettop(L); self = lua_topointer(L, pos); /* Check if we have seen this pointer */ for (i = 0; i < TRACE_POINTS; i++) { if (trace->traces[i] == self) { - r = rspamd_snprintf(d, remain + 1, "ref(%p)", self); - - d += r; - - return (d - outbuf); + if ((trace->cur_level + TRACE_POINTS - 1) % TRACE_POINTS == i) { + return rspamd_snprintf(d, remain, "__self"); + } + return rspamd_snprintf(d, remain, "ref(%p)", self); } } trace->traces[trace->cur_level % TRACE_POINTS] = self; + ++trace->cur_level; - lua_pushvalue(L, pos); - r = rspamd_snprintf(d, remain + 1, "{"); - remain -= r; - d += r; + top = lua_gettop(L); + if (pos < 0) { + pos += top + 1; /* Convert to absolute */ + } + + r = rspamd_snprintf(d, remain, "{"); + MOVE_BUF(d, remain, r); /* Get numeric keys (ipairs) */ for (i = 1;; i++) { - lua_rawgeti(L, -1, i); + lua_rawgeti(L, pos, i); if (lua_isnil(L, -1)) { lua_pop(L, 1); + last_seq = i; break; } - last_seq = i; - - if (!first) { - r = rspamd_snprintf(d, remain + 1, ", "); - MOVE_BUF(d, remain, r); - } - - r = rspamd_snprintf(d, remain + 1, "[%d] = ", i); - MOVE_BUF(d, remain, r); - tpos = lua_gettop(L); - - if (lua_topointer(L, tpos) == self) { - r = rspamd_snprintf(d, remain + 1, "__self"); + if (first) { + first = FALSE; + str = "[%d] = "; } else { - r = lua_logger_out_type(L, tpos, d, remain, trace, esc_type); + str = ", [%d] = "; } + r = rspamd_snprintf(d, remain, str, i); + MOVE_BUF(d, remain, r); + + r = lua_logger_out_type(L, -1, d, remain, trace, esc_type); MOVE_BUF(d, remain, r); - first = FALSE; lua_pop(L, 1); } /* Get string keys (pairs) */ - for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { + for (lua_pushnil(L); lua_next(L, pos); lua_pop(L, 1)) { /* 'key' is at index -2 and 'value' is at index -1 */ - if (lua_type(L, -2) == LUA_TNUMBER) { - if (last_seq > 0) { - lua_pushvalue(L, -2); - if (lua_tonumber(L, -1) <= last_seq + 1) { - lua_pop(L, 1); + /* Preserve key */ + lua_pushvalue(L, -2); + if (last_seq > 0) { + if (lua_type(L, -1) == LUA_TNUMBER) { + num = lua_tonumber(L, -1); /* no conversion here */ + inum = (glong) num; + if ((double) inum == num && inum > 0 && inum < last_seq) { /* Already seen */ + lua_pop(L, 1); continue; } - - lua_pop(L, 1); } } - if (!first) { - r = rspamd_snprintf(d, remain + 1, ", "); - MOVE_BUF(d, remain, r); - } - - /* Preserve key */ - lua_pushvalue(L, -2); - r = rspamd_snprintf(d, remain + 1, "[%s] = ", - lua_tostring(L, -1)); - lua_pop(L, 1); /* Remove key */ - MOVE_BUF(d, remain, r); - tpos = lua_gettop(L); - - if (lua_topointer(L, tpos) == self) { - r = rspamd_snprintf(d, remain + 1, "__self"); + if (first) { + first = FALSE; + str = "[%2] = %1"; } else { - r = lua_logger_out_type(L, tpos, d, remain, trace, esc_type); + str = ", [%2] = %1"; } + r = lua_logger_log_format_str(L, top + 1, d, remain, str, esc_type); + /* lua_logger_log_format_str now handles errors gracefully */ MOVE_BUF(d, remain, r); - first = FALSE; + /* Remove key */ + lua_pop(L, 1); } - lua_settop(L, old_top); - - r = rspamd_snprintf(d, remain + 1, "}"); + r = rspamd_snprintf(d, remain, "}"); d += r; +table_oob: + --trace->cur_level; + return (d - outbuf); } #undef MOVE_BUF -gsize lua_logger_out_type(lua_State *L, int pos, - char *outbuf, gsize len, - struct lua_logger_trace *trace, - enum lua_logger_escape_type esc_type) +static gsize +lua_logger_out_type(lua_State *L, int pos, + char *outbuf, gsize len, + struct lua_logger_trace *trace, + enum lua_logger_escape_type esc_type) { - int type; - gsize r = 0; - if (len == 0) { return 0; } - type = lua_type(L, pos); - trace->cur_level++; + int type = lua_type(L, pos); switch (type) { case LUA_TNUMBER: - r = lua_logger_out_num(L, pos, outbuf, len, trace); - break; + return lua_logger_out_num(L, pos, outbuf, len); case LUA_TBOOLEAN: - r = lua_logger_out_boolean(L, pos, outbuf, len, trace); - break; + return lua_logger_out_boolean(L, pos, outbuf, len); case LUA_TTABLE: - r = lua_logger_out_table(L, pos, outbuf, len, trace, esc_type); - break; + return lua_logger_out_table(L, pos, outbuf, len, trace, esc_type); case LUA_TUSERDATA: - r = lua_logger_out_userdata(L, pos, outbuf, len, trace); - break; + return lua_logger_out_userdata(L, pos, outbuf, len); case LUA_TFUNCTION: - r = rspamd_snprintf(outbuf, len + 1, "function"); - break; + return rspamd_snprintf(outbuf, len, "function"); case LUA_TLIGHTUSERDATA: - r = rspamd_snprintf(outbuf, len + 1, "0x%p", lua_topointer(L, pos)); - break; + return rspamd_snprintf(outbuf, len, "0x%p", lua_topointer(L, pos)); case LUA_TNIL: - r = rspamd_snprintf(outbuf, len + 1, "nil"); - break; + return rspamd_snprintf(outbuf, len, "nil"); case LUA_TNONE: - r = rspamd_snprintf(outbuf, len + 1, "no value"); - break; - default: - /* Try to push everything as string using tostring magic */ - r = lua_logger_out_str(L, pos, outbuf, len, trace, esc_type); - break; + return rspamd_snprintf(outbuf, len, "no value"); } - trace->cur_level--; + /* Try to push everything as string using tostring magic */ + return lua_logger_out_str(L, pos, outbuf, len, esc_type); +} - return r; +gsize lua_logger_out(lua_State *L, int pos, + char *outbuf, gsize len, + enum lua_logger_escape_type esc_type) +{ + struct lua_logger_trace tr; + memset(&tr, 0, sizeof(tr)); + + return lua_logger_out_type(L, pos, outbuf, len, &tr, esc_type); } static const char * @@ -731,72 +763,13 @@ static gboolean lua_logger_log_format(lua_State *L, int fmt_pos, gboolean is_string, char *logbuf, gsize remain) { - char *d; - const char *s, *c; - gsize r; - unsigned int arg_num, arg_max, cur_arg; - struct lua_logger_trace tr; - int digit; - - s = lua_tostring(L, fmt_pos); - if (s == NULL) { + const char *fmt = lua_tostring(L, fmt_pos); + if (fmt == NULL) { return FALSE; } - arg_max = (unsigned int) lua_gettop(L) - fmt_pos; - d = logbuf; - cur_arg = 0; - - while (remain > 0 && *s) { - if (*s == '%') { - ++s; - c = s; - if (*s == 's') { - ++s; - ++cur_arg; - } else { - arg_num = 0; - while ((digit = g_ascii_digit_value(*s)) >= 0) { - ++s; - arg_num = arg_num * 10 + digit; - if (arg_num >= 100) { - /* Avoid ridiculously large numbers */ - s = c; - break; - } - } - - if (s > c) { - /* Update the current argument */ - cur_arg = arg_num; - } - } - - if (s > c) { - if (cur_arg < 1 || cur_arg > arg_max) { - msg_err("wrong argument number: %ud", cur_arg); - return FALSE; - } - - memset(&tr, 0, sizeof(tr)); - r = lua_logger_out_type(L, fmt_pos + cur_arg, d, remain, &tr, - is_string ? LUA_ESCAPE_UNPRINTABLE : LUA_ESCAPE_LOG); - g_assert(r <= remain); - remain -= r; - d += r; - continue; - } - - /* Copy % */ - --s; - } - - *d++ = *s++; - --remain; - } - - *d = '\0'; - + /* lua_logger_log_format_str now handles argument mismatches gracefully */ + lua_logger_log_format_str(L, fmt_pos, logbuf, remain, fmt, is_string ? LUA_ESCAPE_UNPRINTABLE : LUA_ESCAPE_LOG); return TRUE; } @@ -808,15 +781,10 @@ lua_logger_do_log(lua_State *L, { char logbuf[RSPAMD_LOGBUF_SIZE - 128]; const char *uid = NULL; - int fmt_pos = start_pos; int ret; - GError *err = NULL; - if (lua_type(L, start_pos) == LUA_TSTRING) { - fmt_pos = start_pos; - } - else if (lua_type(L, start_pos) == LUA_TUSERDATA) { - fmt_pos = start_pos + 1; + if (lua_type(L, start_pos) == LUA_TUSERDATA) { + GError *err = NULL; uid = lua_logger_get_id(L, start_pos, &err); @@ -830,15 +798,17 @@ lua_logger_do_log(lua_State *L, return ret; } + + ++start_pos; } - else { + + if (lua_type(L, start_pos) != LUA_TSTRING) { /* Bad argument type */ return luaL_error(L, "bad format string type: %s", lua_typename(L, lua_type(L, start_pos))); } - ret = lua_logger_log_format(L, fmt_pos, is_string, - logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, start_pos, is_string, logbuf, sizeof(logbuf)); if (ret) { if (is_string) { @@ -849,12 +819,9 @@ lua_logger_do_log(lua_State *L, lua_common_log_line(level, L, logbuf, uid, "lua", 1); } } - else { - if (is_string) { - lua_pushnil(L); - - return 1; - } + else if (is_string) { + lua_pushnil(L); + return 1; } return 0; @@ -917,11 +884,11 @@ lua_logger_logx(lua_State *L) if (uid && modname) { if (lua_type(L, 4) == LUA_TSTRING) { - ret = lua_logger_log_format(L, 4, FALSE, logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, 4, FALSE, logbuf, sizeof(logbuf)); } else if (lua_type(L, 4) == LUA_TNUMBER) { stack_pos = lua_tonumber(L, 4); - ret = lua_logger_log_format(L, 5, FALSE, logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, 5, FALSE, logbuf, sizeof(logbuf)); } else { return luaL_error(L, "invalid argument on pos 4"); @@ -959,11 +926,11 @@ lua_logger_debugm(lua_State *L) if (uid && module) { if (lua_type(L, 3) == LUA_TSTRING) { - ret = lua_logger_log_format(L, 3, FALSE, logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, 3, FALSE, logbuf, sizeof(logbuf)); } else if (lua_type(L, 3) == LUA_TNUMBER) { stack_pos = lua_tonumber(L, 3); - ret = lua_logger_log_format(L, 4, FALSE, logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, 4, FALSE, logbuf, sizeof(logbuf)); } else { return luaL_error(L, "invalid argument on pos 3"); diff --git a/src/lua/lua_map.c b/src/lua/lua_map.c index 062613bd7..fa375cf63 100644 --- a/src/lua/lua_map.c +++ b/src/lua/lua_map.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,6 +56,20 @@ rspamd_config:register_symbol{ description = "A sample symbol", callback = sample_symbol_cb, } + +-- Callback map that processes lines one by one +local function process_line_cb(key, value, map) + -- This callback is called for each key-value pair in the map + rspamd_logger.infox('Got key %s with value %s', key, value) +end + +local callback_map = rspamd_config:add_map{ + type = "callback", + urls = ['file:///path/to/file'], + description = 'line by line map', + callback = process_line_cb, + by_line = true, -- Process map line by line instead of loading all data +} */ /*** @@ -156,6 +170,13 @@ LUA_FUNCTION_DEF(map, get_data_digest); */ LUA_FUNCTION_DEF(map, get_nelts); +/*** + * @method map:trigger_hyperscan_compilation() + * Trigger hyperscan compilation for regexp scopes that may have been updated by this map + * This should be called after map loading is complete for maps that update regexp scopes + */ +LUA_FUNCTION_DEF(map, trigger_hyperscan_compilation); + static const struct luaL_reg maplib_m[] = { LUA_INTERFACE_DEF(map, get_key), LUA_INTERFACE_DEF(map, is_signed), @@ -169,6 +190,7 @@ static const struct luaL_reg maplib_m[] = { LUA_INTERFACE_DEF(map, on_load), LUA_INTERFACE_DEF(map, get_data_digest), LUA_INTERFACE_DEF(map, get_nelts), + LUA_INTERFACE_DEF(map, trigger_hyperscan_compilation), {"__tostring", rspamd_lua_class_tostring}, {NULL, NULL}}; @@ -176,6 +198,7 @@ struct lua_map_callback_data { lua_State *L; int ref; gboolean opaque; + gboolean by_line; rspamd_fstring_t *data; struct rspamd_lua_map *lua_map; }; @@ -319,6 +342,11 @@ int lua_config_radix_from_ucl(lua_State *L) ucl_object_insert_key(fake_obj, ucl_object_fromstring("static"), "url", 0, false); + if (lua_type(L, 3) == LUA_TSTRING) { + ucl_object_insert_key(fake_obj, ucl_object_fromstring(lua_tostring(L, 3)), + "description", 0, false); + } + if ((m = rspamd_map_add_from_ucl(cfg, fake_obj, "static radix map", rspamd_radix_read, rspamd_radix_fin, @@ -428,6 +456,102 @@ int lua_config_add_kv_map(lua_State *L) } +static void +lua_map_line_insert(gpointer st, gconstpointer key, gconstpointer value) +{ + struct lua_map_callback_data *cbdata = st; + struct rspamd_lua_map **pmap; + struct rspamd_map *map = cbdata->lua_map->map; + + if (cbdata->ref == -1) { + msg_err_map("map has no callback set"); + return; + } + + lua_pushcfunction(cbdata->L, &rspamd_lua_traceback); + int err_idx = lua_gettop(cbdata->L); + + lua_rawgeti(cbdata->L, LUA_REGISTRYINDEX, cbdata->ref); + + /* Push key */ + if (!cbdata->opaque) { + lua_pushstring(cbdata->L, key); + lua_pushstring(cbdata->L, value); + } + else { + /* Key */ + lua_new_text(cbdata->L, key, strlen(key), 0); + + /* Value */ + lua_new_text(cbdata->L, value, strlen(value), 0); + } + + /* Push map object */ + pmap = lua_newuserdata(cbdata->L, sizeof(void *)); + *pmap = cbdata->lua_map; + rspamd_lua_setclass(cbdata->L, rspamd_map_classname, -1); + + int ret = lua_pcall(cbdata->L, 3, 0, err_idx); + + if (ret != 0) { + msg_info_map("call to line callback failed (%d): %s", ret, + lua_tostring(cbdata->L, -1)); + } + + lua_settop(cbdata->L, err_idx - 1); +} + +static char * +lua_map_line_read(char *chunk, int len, + struct map_cb_data *data, + gboolean final) +{ + struct lua_map_callback_data *cbdata, *old; + + if (data->cur_data == NULL) { + old = (struct lua_map_callback_data *) data->prev_data; + cbdata = old; + cbdata->L = old->L; + cbdata->ref = old->ref; + cbdata->lua_map = old->lua_map; + cbdata->by_line = old->by_line; + cbdata->opaque = old->opaque; + data->cur_data = cbdata; + data->prev_data = NULL; + } + else { + cbdata = (struct lua_map_callback_data *) data->cur_data; + } + + return rspamd_parse_kv_list(chunk, len, data, lua_map_line_insert, "", final); +} + +static void +lua_map_line_fin(struct map_cb_data *data, void **target) +{ + struct lua_map_callback_data *cbdata; + + if (data->errored) { + if (data->cur_data) { + cbdata = (struct lua_map_callback_data *) data->cur_data; + if (cbdata->ref != -1) { + luaL_unref(cbdata->L, LUA_REGISTRYINDEX, cbdata->ref); + } + + data->cur_data = NULL; + } + } + else { + if (target) { + *target = data->cur_data; + } + + if (data->prev_data) { + data->prev_data = NULL; + } + } +} + static char * lua_map_read(char *chunk, int len, struct map_cb_data *data, @@ -441,6 +565,8 @@ lua_map_read(char *chunk, int len, cbdata->L = old->L; cbdata->ref = old->ref; cbdata->lua_map = old->lua_map; + cbdata->by_line = old->by_line; + cbdata->opaque = old->opaque; data->cur_data = cbdata; data->prev_data = NULL; } @@ -504,13 +630,7 @@ lua_map_fin(struct map_cb_data *data, void **target) lua_pushlstring(cbdata->L, cbdata->data->str, cbdata->data->len); } else { - struct rspamd_lua_text *t; - - t = lua_newuserdata(cbdata->L, sizeof(*t)); - rspamd_lua_setclass(cbdata->L, rspamd_text_classname, -1); - t->flags = 0; - t->len = cbdata->data->len; - t->start = cbdata->data->str; + lua_new_text(cbdata->L, cbdata->data->str, cbdata->data->len, 0); } pmap = lua_newuserdata(cbdata->L, sizeof(void *)); @@ -568,14 +688,15 @@ int lua_config_add_map(lua_State *L) struct rspamd_lua_map *map, **pmap; struct rspamd_map *m; gboolean opaque_data = FALSE; + gboolean by_line = FALSE; int cbidx = -1, ret; GError *err = NULL; if (cfg) { if (!rspamd_lua_parse_table_arguments(L, 2, &err, RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT, - "*url=O;description=S;callback=F;type=S;opaque_data=B", - &map_obj, &description, &cbidx, &type, &opaque_data)) { + "*url=O;description=S;callback=F;type=S;opaque_data=B;by_line=B", + &map_obj, &description, &cbidx, &type, &opaque_data, &by_line)) { ret = luaL_error(L, "invalid table arguments: %s", err->message); g_error_free(err); if (map_obj) { @@ -605,10 +726,11 @@ int lua_config_add_map(lua_State *L) cbdata->lua_map = map; cbdata->ref = cbidx; cbdata->opaque = opaque_data; + cbdata->by_line = by_line; if ((m = rspamd_map_add_from_ucl(cfg, map_obj, description, - lua_map_read, - lua_map_fin, + by_line ? lua_map_line_read : lua_map_read, + by_line ? lua_map_line_fin : lua_map_fin, lua_map_dtor, (void **) &map->data.cbdata, NULL, RSPAMD_MAP_DEFAULT)) == NULL) { @@ -1412,6 +1534,21 @@ lua_map_on_load(lua_State *L) return 0; } +static int +lua_map_trigger_hyperscan_compilation(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_map *map = lua_check_map(L, 1); + + if (map == NULL) { + return luaL_error(L, "invalid arguments"); + } + + rspamd_map_trigger_hyperscan_compilation(map->map); + + return 0; +} + void luaopen_map(lua_State *L) { rspamd_lua_new_class(L, rspamd_map_classname, maplib_m); diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index 07dba9c93..982b10d90 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -901,7 +901,7 @@ lua_textpart_get_words_count(lua_State *L) return 1; } - if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) { lua_pushinteger(L, 0); } else { @@ -943,7 +943,7 @@ lua_textpart_get_words(lua_State *L) return luaL_error(L, "invalid arguments"); } - if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) { lua_createtable(L, 0, 0); } else { @@ -957,7 +957,7 @@ lua_textpart_get_words(lua_State *L) } } - return rspamd_lua_push_words(L, part->utf_words, how); + return rspamd_lua_push_words_kvec(L, &part->utf_words, how); } return 1; @@ -976,7 +976,7 @@ lua_textpart_filter_words(lua_State *L) return luaL_error(L, "invalid arguments"); } - if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) { lua_createtable(L, 0, 0); } else { @@ -998,9 +998,8 @@ lua_textpart_filter_words(lua_State *L) lua_createtable(L, 8, 0); - for (i = 0, cnt = 1; i < part->utf_words->len; i++) { - rspamd_stat_token_t *w = &g_array_index(part->utf_words, - rspamd_stat_token_t, i); + for (i = 0, cnt = 1; i < kv_size(part->utf_words); i++) { + rspamd_word_t *w = &kv_A(part->utf_words, i); switch (how) { case RSPAMD_LUA_WORDS_STEM: @@ -1194,13 +1193,13 @@ struct lua_shingle_filter_cbdata { rspamd_mempool_t *pool; }; -#define STORE_TOKEN(i, t) \ - do { \ - if ((i) < part->utf_words->len) { \ - word = &g_array_index(part->utf_words, rspamd_stat_token_t, (i)); \ - sd->t.begin = word->stemmed.begin; \ - sd->t.len = word->stemmed.len; \ - } \ +#define STORE_TOKEN(i, t) \ + do { \ + if ((i) < kv_size(part->utf_words)) { \ + word = &kv_A(part->utf_words, (i)); \ + sd->t.begin = word->stemmed.begin; \ + sd->t.len = word->stemmed.len; \ + } \ } while (0) static uint64_t @@ -1210,7 +1209,7 @@ lua_shingles_filter(uint64_t *input, gsize count, uint64_t minimal = G_MAXUINT64; gsize i, min_idx = 0; struct lua_shingle_data *sd; - rspamd_stat_token_t *word; + rspamd_word_t *word; struct lua_shingle_filter_cbdata *cbd = (struct lua_shingle_filter_cbdata *) ud; struct rspamd_mime_text_part *part; @@ -1248,7 +1247,7 @@ lua_textpart_get_fuzzy_hashes(lua_State *L) unsigned int i; struct lua_shingle_data *sd; rspamd_cryptobox_hash_state_t st; - rspamd_stat_token_t *word; + rspamd_word_t *word; struct lua_shingle_filter_cbdata cbd; @@ -1256,7 +1255,7 @@ lua_textpart_get_fuzzy_hashes(lua_State *L) return luaL_error(L, "invalid arguments"); } - if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) { lua_pushnil(L); lua_pushnil(L); } @@ -1269,8 +1268,8 @@ lua_textpart_get_fuzzy_hashes(lua_State *L) /* Calculate direct hash */ rspamd_cryptobox_hash_init(&st, key, rspamd_cryptobox_HASHKEYBYTES); - for (i = 0; i < part->utf_words->len; i++) { - word = &g_array_index(part->utf_words, rspamd_stat_token_t, i); + for (i = 0; i < kv_size(part->utf_words); i++) { + word = &kv_A(part->utf_words, i); rspamd_cryptobox_hash_update(&st, word->stemmed.begin, word->stemmed.len); } @@ -1283,7 +1282,7 @@ lua_textpart_get_fuzzy_hashes(lua_State *L) cbd.pool = pool; cbd.part = part; - sgl = rspamd_shingles_from_text(part->utf_words, key, + sgl = rspamd_shingles_from_text(&part->utf_words, key, pool, lua_shingles_filter, &cbd, RSPAMD_SHINGLES_MUMHASH); if (sgl == NULL) { diff --git a/src/lua/lua_parsers.c b/src/lua/lua_parsers.c index f77b36952..eb7fa6bf5 100644 --- a/src/lua/lua_parsers.c +++ b/src/lua/lua_parsers.c @@ -1,11 +1,11 @@ -/*- - * Copyright 2020 Vsevolod Stakhov +/* + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -46,6 +46,14 @@ */ /*** + * @function parsers.parse_html_content(input, mempool) + * Parses HTML and returns the HTML content object for structure analysis + * @param {string|text} in input HTML + * @param {rspamd_mempool} mempool memory pool for HTML content management + * @return {html_content} HTML content object with tag structure + */ +LUA_FUNCTION_DEF(parsers, parse_html_content); +/*** * @function parsers.parse_mail_address(str, [pool]) * Parses email address and returns a table of tables in the following format: * @@ -93,6 +101,7 @@ static const struct luaL_reg parserslib_f[] = { LUA_INTERFACE_DEF(parsers, tokenize_text), LUA_INTERFACE_DEF(parsers, parse_html), + LUA_INTERFACE_DEF(parsers, parse_html_content), LUA_INTERFACE_DEF(parsers, parse_mail_address), LUA_INTERFACE_DEF(parsers, parse_content_type), LUA_INTERFACE_DEF(parsers, parse_smtp_date), @@ -108,8 +117,8 @@ int lua_parsers_tokenize_text(lua_State *L) struct rspamd_lua_text *t; struct rspamd_process_exception *ex; UText utxt = UTEXT_INITIALIZER; - GArray *res; - rspamd_stat_token_t *w; + rspamd_words_t *res; + rspamd_word_t *w; if (lua_type(L, 1) == LUA_TSTRING) { in = luaL_checklstring(L, 1, &len); @@ -175,13 +184,15 @@ int lua_parsers_tokenize_text(lua_State *L) lua_pushnil(L); } else { - lua_createtable(L, res->len, 0); + lua_createtable(L, kv_size(*res), 0); - for (i = 0; i < res->len; i++) { - w = &g_array_index(res, rspamd_stat_token_t, i); + for (i = 0; i < kv_size(*res); i++) { + w = &kv_A(*res, i); lua_pushlstring(L, w->original.begin, w->original.len); lua_rawseti(L, -2, i + 1); } + kv_destroy(*res); + g_free(res); } cur = exceptions; @@ -240,6 +251,62 @@ int lua_parsers_parse_html(lua_State *L) return 1; } +static int lua_parsers_parse_html_content(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t; + const char *start = NULL; + gsize len; + GByteArray *in; + rspamd_mempool_t *pool; + void *hc; + void **phc; + + if (lua_type(L, 1) == LUA_TUSERDATA) { + t = lua_check_text(L, 1); + + if (t != NULL) { + start = t->start; + len = t->len; + } + } + else if (lua_type(L, 1) == LUA_TSTRING) { + start = luaL_checklstring(L, 1, &len); + } + + if (lua_type(L, 2) != LUA_TUSERDATA) { + return luaL_error(L, "invalid arguments: mempool expected as second argument"); + } + + pool = rspamd_lua_check_mempool(L, 2); + if (!pool) { + return luaL_error(L, "invalid mempool argument"); + } + + if (start != NULL) { + in = g_byte_array_sized_new(len); + g_byte_array_append(in, start, len); + + hc = rspamd_html_process_part(pool, in); + + if (hc) { + phc = lua_newuserdata(L, sizeof(void *)); + *phc = hc; + rspamd_lua_setclass(L, rspamd_html_classname, -1); + } + else { + lua_pushnil(L); + } + + g_byte_array_free(in, TRUE); + } + else { + lua_pushnil(L); + } + + return 1; +} + int lua_parsers_parse_mail_address(lua_State *L) { LUA_TRACE_POINT; @@ -407,4 +474,4 @@ lua_load_parsers(lua_State *L) void luaopen_parsers(lua_State *L) { rspamd_lua_add_preload(L, "rspamd_parsers", lua_load_parsers); -}
\ No newline at end of file +} diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 97f9c496e..0b1473b61 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -6943,7 +6943,7 @@ lua_task_get_meta_words(lua_State *L) return luaL_error(L, "invalid arguments"); } - if (task->meta_words == NULL) { + if (!task->meta_words.a) { lua_createtable(L, 0, 0); } else { @@ -6967,7 +6967,7 @@ lua_task_get_meta_words(lua_State *L) } } - return rspamd_lua_push_words(L, task->meta_words, how); + return rspamd_lua_push_words_kvec(L, &task->meta_words, how); } return 1; @@ -7039,6 +7039,76 @@ lua_lookup_words_array(lua_State *L, return nmatched; } +static unsigned int +lua_lookup_words_kvec(lua_State *L, + int cbpos, + struct rspamd_task *task, + struct rspamd_lua_map *map, + rspamd_words_t *words) +{ + rspamd_word_t *tok; + unsigned int i, nmatched = 0; + int err_idx; + gboolean matched; + const char *key; + gsize keylen; + + if (!words || !words->a) { + return 0; + } + + for (i = 0; i < kv_size(*words); i++) { + tok = &kv_A(*words, i); + + matched = FALSE; + + if (tok->normalized.len == 0) { + continue; + } + + key = tok->normalized.begin; + keylen = tok->normalized.len; + + switch (map->type) { + case RSPAMD_LUA_MAP_SET: + case RSPAMD_LUA_MAP_HASH: + /* We know that tok->normalized is zero terminated in fact */ + if (rspamd_match_hash_map(map->data.hash, key, keylen)) { + matched = TRUE; + } + break; + case RSPAMD_LUA_MAP_REGEXP: + case RSPAMD_LUA_MAP_REGEXP_MULTIPLE: + if (rspamd_match_regexp_map_single(map->data.re_map, key, + keylen)) { + matched = TRUE; + } + break; + default: + g_assert_not_reached(); + break; + } + + if (matched) { + nmatched++; + + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + lua_pushvalue(L, cbpos); /* Function */ + rspamd_lua_push_full_word(L, tok); + + if (lua_pcall(L, 1, 0, err_idx) != 0) { + msg_err_task("cannot call callback function for lookup words: %s", + lua_tostring(L, -1)); + } + + lua_settop(L, err_idx - 1); + } + } + + return nmatched; +} + static int lua_task_lookup_words(lua_State *L) { @@ -7062,13 +7132,13 @@ lua_task_lookup_words(lua_State *L) PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp) { - if (tp->utf_words) { - matches += lua_lookup_words_array(L, 3, task, map, tp->utf_words); + if (tp->utf_words.a) { + matches += lua_lookup_words_kvec(L, 3, task, map, &tp->utf_words); } } - if (task->meta_words) { - matches += lua_lookup_words_array(L, 3, task, map, task->meta_words); + if (task->meta_words.a) { + matches += lua_lookup_words_kvec(L, 3, task, map, &task->meta_words); } lua_pushinteger(L, matches); diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 9fe862757..f2e9b8fa9 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -23,12 +23,21 @@ #include "lua_parsers.h" -#ifdef WITH_LUA_REPL -#include "replxx.h" -#endif +#include "replxx.h" #include <math.h> #include <glob.h> +#include <sys/types.h> +#include <sys/time.h> +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) +#include <sys/sysctl.h> +#ifdef __FreeBSD__ +#include <sys/user.h> +#endif +#endif +#ifdef __APPLE__ +#include <mach/mach.h> +#endif #include "unicode/uspoof.h" #include "unicode/uscript.h" @@ -629,6 +638,27 @@ LUA_FUNCTION_DEF(util, caseless_hash_fast); LUA_FUNCTION_DEF(util, get_hostname); /*** + * @function util.get_uptime() + * Returns system uptime in seconds + * @return {number} uptime in seconds + */ +LUA_FUNCTION_DEF(util, get_uptime); + +/*** + * @function util.get_pid() + * Returns current process PID + * @return {number} process ID + */ +LUA_FUNCTION_DEF(util, get_pid); + +/*** + * @function util.get_memory_usage() + * Returns memory usage information for current process + * @return {table} memory usage info with 'rss' and 'vsize' fields in bytes + */ +LUA_FUNCTION_DEF(util, get_memory_usage); + +/*** * @function util.parse_content_type(ct_string, mempool) * Parses content-type string to a table: * - `type` @@ -730,6 +760,9 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF(util, umask), LUA_INTERFACE_DEF(util, isatty), LUA_INTERFACE_DEF(util, get_hostname), + LUA_INTERFACE_DEF(util, get_uptime), + LUA_INTERFACE_DEF(util, get_pid), + LUA_INTERFACE_DEF(util, get_memory_usage), LUA_INTERFACE_DEF(util, parse_content_type), LUA_INTERFACE_DEF(util, mime_header_encode), LUA_INTERFACE_DEF(util, pack), @@ -2416,6 +2449,107 @@ lua_util_get_hostname(lua_State *L) } static int +lua_util_get_uptime(lua_State *L) +{ + LUA_TRACE_POINT; + double uptime = 0.0; + +#ifdef __linux__ + FILE *f = fopen("/proc/uptime", "r"); + if (f) { + if (fscanf(f, "%lf", &uptime) != 1) { + uptime = 0.0; + } + fclose(f); + } +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + struct timeval boottime; + size_t len = sizeof(boottime); + int mib[2] = {CTL_KERN, KERN_BOOTTIME}; + + if (sysctl(mib, 2, &boottime, &len, NULL, 0) == 0) { + struct timeval now; + gettimeofday(&now, NULL); + uptime = (now.tv_sec - boottime.tv_sec) + + (now.tv_usec - boottime.tv_usec) / 1000000.0; + } +#endif + + lua_pushnumber(L, uptime); + return 1; +} + +static int +lua_util_get_pid(lua_State *L) +{ + LUA_TRACE_POINT; + lua_pushinteger(L, getpid()); + return 1; +} + +static int +lua_util_get_memory_usage(lua_State *L) +{ + LUA_TRACE_POINT; + lua_createtable(L, 0, 2); + +#ifdef __linux__ + FILE *f = fopen("/proc/self/status", "r"); + if (f) { + char line[256]; + long rss = 0, vsize = 0; + + while (fgets(line, sizeof(line), f)) { + if (sscanf(line, "VmRSS: %ld kB", &rss) == 1) { + rss *= 1024; /* Convert to bytes */ + } + else if (sscanf(line, "VmSize: %ld kB", &vsize) == 1) { + vsize *= 1024; /* Convert to bytes */ + } + } + fclose(f); + + lua_pushstring(L, "rss"); + lua_pushinteger(L, rss); + lua_settable(L, -3); + + lua_pushstring(L, "vsize"); + lua_pushinteger(L, vsize); + lua_settable(L, -3); + } +#elif defined(__APPLE__) + struct task_basic_info info; + mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT; + + if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t) &info, &count) == KERN_SUCCESS) { + lua_pushstring(L, "rss"); + lua_pushinteger(L, info.resident_size); + lua_settable(L, -3); + + lua_pushstring(L, "vsize"); + lua_pushinteger(L, info.virtual_size); + lua_settable(L, -3); + } +#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + struct kinfo_proc kp; + size_t len = sizeof(kp); + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, getpid()}; + + if (sysctl(mib, 4, &kp, &len, NULL, 0) == 0) { + lua_pushstring(L, "rss"); + lua_pushinteger(L, kp.ki_rssize * getpagesize()); + lua_settable(L, -3); + + lua_pushstring(L, "vsize"); + lua_pushinteger(L, kp.ki_size); + lua_settable(L, -3); + } +#endif + + return 1; +} + +static int lua_util_parse_content_type(lua_State *L) { return lua_parsers_parse_content_type(L); @@ -2510,7 +2644,7 @@ lua_util_readline(lua_State *L) if (lua_type(L, 1) == LUA_TSTRING) { prompt = lua_tostring(L, 1); } -#ifdef WITH_LUA_REPL + static Replxx *rx_instance = NULL; if (rx_instance == NULL) { @@ -2527,26 +2661,6 @@ lua_util_readline(lua_State *L) else { lua_pushnil(L); } -#else - size_t linecap = 0; - ssize_t linelen; - - fprintf(stdout, "%s ", prompt); - - linelen = getline(&input, &linecap, stdin); - - if (linelen > 0) { - if (input[linelen - 1] == '\n') { - linelen--; - } - - lua_pushlstring(L, input, linelen); - free(input); - } - else { - lua_pushnil(L); - } -#endif return 1; } @@ -3721,4 +3835,4 @@ lua_ev_base_add_timer(lua_State *L) ev_timer_start(ev_base, &cbdata->ev); return 0; -}
\ No newline at end of file +} |