diff options
Diffstat (limited to 'src/lua')
-rw-r--r-- | src/lua/lua_common.c | 52 | ||||
-rw-r--r-- | src/lua/lua_common.h | 7 | ||||
-rw-r--r-- | src/lua/lua_config.c | 92 | ||||
-rw-r--r-- | src/lua/lua_cryptobox.c | 2 | ||||
-rw-r--r-- | src/lua/lua_http.c | 123 | ||||
-rw-r--r-- | src/lua/lua_logger.c | 102 | ||||
-rw-r--r-- | src/lua/lua_map.c | 7 | ||||
-rw-r--r-- | src/lua/lua_mimepart.c | 39 | ||||
-rw-r--r-- | src/lua/lua_parsers.c | 18 | ||||
-rw-r--r-- | src/lua/lua_task.c | 82 | ||||
-rw-r--r-- | src/lua/lua_util.c | 164 |
11 files changed, 562 insertions, 126 deletions
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 3a0f1a06c..f36228680 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -2401,7 +2401,7 @@ rspamd_lua_try_load_redis(lua_State *L, const ucl_object_t *obj, return FALSE; } -void rspamd_lua_push_full_word(lua_State *L, rspamd_stat_token_t *w) +void rspamd_lua_push_full_word(lua_State *L, rspamd_word_t *w) { int fl_cnt; @@ -2521,6 +2521,54 @@ int rspamd_lua_push_words(lua_State *L, GArray *words, return 1; } +int rspamd_lua_push_words_kvec(lua_State *L, rspamd_words_t *words, + enum rspamd_lua_words_type how) +{ + rspamd_word_t *w; + unsigned int i, cnt; + + if (!words || !words->a) { + lua_createtable(L, 0, 0); + return 1; + } + + lua_createtable(L, kv_size(*words), 0); + + for (i = 0, cnt = 1; i < kv_size(*words); i++) { + w = &kv_A(*words, i); + + switch (how) { + case RSPAMD_LUA_WORDS_STEM: + if (w->stemmed.len > 0) { + lua_pushlstring(L, w->stemmed.begin, w->stemmed.len); + lua_rawseti(L, -2, cnt++); + } + break; + case RSPAMD_LUA_WORDS_NORM: + if (w->normalized.len > 0) { + lua_pushlstring(L, w->normalized.begin, w->normalized.len); + lua_rawseti(L, -2, cnt++); + } + break; + case RSPAMD_LUA_WORDS_RAW: + if (w->original.len > 0) { + lua_pushlstring(L, w->original.begin, w->original.len); + lua_rawseti(L, -2, cnt++); + } + break; + case RSPAMD_LUA_WORDS_FULL: + rspamd_lua_push_full_word(L, w); + /* Push to the resulting vector */ + lua_rawseti(L, -2, cnt++); + break; + default: + break; + } + } + + return 1; +} + char * rspamd_lua_get_module_name(lua_State *L) { @@ -2658,4 +2706,4 @@ int rspamd_lua_geti(lua_State *L, int pos, int i) return lua_type(L, -1); } -#endif
\ No newline at end of file +#endif diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index 5819da8cb..d494f0923 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -539,7 +539,7 @@ enum lua_logger_escape_type { * @return */ gsize lua_logger_out(lua_State *L, int pos, char *outbuf, gsize len, - enum lua_logger_escape_type esc_type); + enum lua_logger_escape_type esc_type); /** * Safely checks userdata to match specified class @@ -632,7 +632,7 @@ struct rspamd_stat_token_s; * @param L * @param word */ -void rspamd_lua_push_full_word(lua_State *L, struct rspamd_stat_token_s *word); +void rspamd_lua_push_full_word(lua_State *L, rspamd_word_t *word); enum rspamd_lua_words_type { RSPAMD_LUA_WORDS_STEM = 0, @@ -651,6 +651,9 @@ enum rspamd_lua_words_type { int rspamd_lua_push_words(lua_State *L, GArray *words, enum rspamd_lua_words_type how); +int rspamd_lua_push_words_kvec(lua_State *L, rspamd_words_t *words, + enum rspamd_lua_words_type how); + /** * Returns newly allocated name for caller module name * @param L diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index 07ed58ad5..7b3a156cd 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,10 @@ #include "utlist.h" #include <math.h> +/* Forward declarations for custom tokenizer functions */ +gboolean rspamd_config_load_custom_tokenizers(struct rspamd_config *cfg, GError **err); +void rspamd_config_unload_custom_tokenizers(struct rspamd_config *cfg); + /*** * This module is used to configure rspamd and is normally available as global * variable named `rspamd_config`. Unlike other modules, it is not necessary to @@ -118,7 +122,7 @@ local function foo(task) end */ /*** -* @method rspamd_config:radix_from_ucl(obj) +* @method rspamd_config:radix_from_ucl(obj, description) * Creates new embedded map of IP/mask addresses from object. * @param {ucl} obj object * @return {map} radix tree object @@ -862,6 +866,19 @@ LUA_FUNCTION_DEF(config, get_dns_max_requests); */ LUA_FUNCTION_DEF(config, get_dns_timeout); +/*** + * @method rspamd_config:load_custom_tokenizers() + * Loads custom tokenizers from configuration + * @return {boolean} true if successful + */ +LUA_FUNCTION_DEF(config, load_custom_tokenizers); + +/*** + * @method rspamd_config:unload_custom_tokenizers() + * Unloads custom tokenizers and frees memory + */ +LUA_FUNCTION_DEF(config, unload_custom_tokenizers); + static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF(config, get_module_opt), LUA_INTERFACE_DEF(config, get_mempool), @@ -937,6 +954,8 @@ static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF(config, get_tld_path), LUA_INTERFACE_DEF(config, get_dns_max_requests), LUA_INTERFACE_DEF(config, get_dns_timeout), + LUA_INTERFACE_DEF(config, load_custom_tokenizers), + LUA_INTERFACE_DEF(config, unload_custom_tokenizers), {"__tostring", rspamd_lua_class_tostring}, {"__newindex", lua_config_newindex}, {NULL, NULL}}; @@ -4485,11 +4504,14 @@ lua_config_init_subsystem(lua_State *L) nparts = g_strv_length(parts); for (i = 0; i < nparts; i++) { - if (strcmp(parts[i], "filters") == 0) { + const char *str = parts[i]; + + /* TODO: total shit, rework some day */ + if (strcmp(str, "filters") == 0) { rspamd_lua_post_load_config(cfg); rspamd_init_filters(cfg, false, false); } - else if (strcmp(parts[i], "langdet") == 0) { + else if (strcmp(str, "langdet") == 0) { if (!cfg->lang_det) { cfg->lang_det = rspamd_language_detector_init(cfg); rspamd_mempool_add_destructor(cfg->cfg_pool, @@ -4497,10 +4519,10 @@ lua_config_init_subsystem(lua_State *L) cfg->lang_det); } } - else if (strcmp(parts[i], "stat") == 0) { + else if (strcmp(str, "stat") == 0) { rspamd_stat_init(cfg, NULL); } - else if (strcmp(parts[i], "dns") == 0) { + else if (strcmp(str, "dns") == 0) { struct ev_loop *ev_base = lua_check_ev_base(L, 3); if (ev_base) { @@ -4514,11 +4536,25 @@ lua_config_init_subsystem(lua_State *L) return luaL_error(L, "no event base specified"); } } - else if (strcmp(parts[i], "symcache") == 0) { + else if (strcmp(str, "symcache") == 0) { rspamd_symcache_init(cfg->cache); } + else if (strcmp(str, "tokenizers") == 0 || strcmp(str, "custom_tokenizers") == 0) { + GError *err = NULL; + if (!rspamd_config_load_custom_tokenizers(cfg, &err)) { + g_strfreev(parts); + if (err) { + int ret = luaL_error(L, "failed to load custom tokenizers: %s", err->message); + g_error_free(err); + return ret; + } + else { + return luaL_error(L, "failed to load custom tokenizers"); + } + } + } else { - int ret = luaL_error(L, "invalid param: %s", parts[i]); + int ret = luaL_error(L, "invalid param: %s", str); g_strfreev(parts); return ret; @@ -4772,3 +4808,43 @@ void lua_call_finish_script(struct rspamd_config_cfg_lua_script *sc, lua_thread_call(thread, 1); } + +static int +lua_config_load_custom_tokenizers(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg != NULL) { + GError *err = NULL; + gboolean ret = rspamd_config_load_custom_tokenizers(cfg, &err); + + if (!ret && err) { + lua_pushboolean(L, FALSE); + lua_pushstring(L, err->message); + g_error_free(err); + return 2; + } + + lua_pushboolean(L, ret); + return 1; + } + else { + return luaL_error(L, "invalid arguments"); + } +} + +static int +lua_config_unload_custom_tokenizers(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg != NULL) { + rspamd_config_unload_custom_tokenizers(cfg); + return 0; + } + else { + return luaL_error(L, "invalid arguments"); + } +} diff --git a/src/lua/lua_cryptobox.c b/src/lua/lua_cryptobox.c index 721d71256..2c2254920 100644 --- a/src/lua/lua_cryptobox.c +++ b/src/lua/lua_cryptobox.c @@ -404,7 +404,7 @@ lua_cryptobox_keypair_load(lua_State *L) if (lua_type(L, 1) == LUA_TSTRING) { buf = luaL_checklstring(L, 1, &len); if (buf != NULL) { - parser = ucl_parser_new(0); + parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS); if (!ucl_parser_add_chunk(parser, buf, len)) { msg_err("cannot open keypair from data: %s", diff --git a/src/lua/lua_http.c b/src/lua/lua_http.c index 7e9e7b1df..731b8b057 100644 --- a/src/lua/lua_http.c +++ b/src/lua/lua_http.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,22 +29,123 @@ * This module hides all complexity: DNS resolving, sessions management, zero-copy * text transfers and so on under the hood. * @example +-- Basic GET request with callback local rspamd_http = require "rspamd_http" local function symbol_callback(task) local function http_callback(err_message, code, body, headers) task:insert_result('SYMBOL', 1) -- task is available via closure + + if err_message then + -- Handle error + return + end + + -- Process response + if code == 200 then + -- Process body and headers + for name, value in pairs(headers) do + -- Headers are lowercase + end + end end - rspamd_http.request({ - task=task, - url='http://example.com/data', - body=task:get_content(), - callback=http_callback, - headers={Header='Value', OtherHeader='Value'}, - mime_type='text/plain', - }) - end + rspamd_http.request({ + task=task, + url='http://example.com/data', + body=task:get_content(), + callback=http_callback, + headers={Header='Value', OtherHeader='Value', DuplicatedHeader={'Multiple', 'Values'}}, + mime_type='text/plain', + }) +end + +-- POST request with JSON body +local function post_json_example(task) + local ucl = require "ucl" + local data = { + id = task:get_queue_id(), + sender = task:get_from()[1].addr + } + + local json_data = ucl.to_json(data) + + rspamd_http.request({ + task = task, + url = "http://example.com/api/submit", + method = "POST", + body = json_data, + headers = {['Content-Type'] = 'application/json'}, + callback = function(err, code, body, headers) + if not err and code == 200 then + -- Success + end + end + }) +end + +-- Synchronous HTTP request (using coroutines) +local function sync_http_example(task) + -- No callback makes this a synchronous call + local err, response = rspamd_http.request({ + task = task, + url = "http://example.com/api/data", + method = "GET", + timeout = 10.0 + }) + + if not err then + -- Response is a table with code, content, and headers + if response.code == 200 then + -- Process response.content + return true + end + end + return false +end + +-- Using authentication +local function auth_example(task) + rspamd_http.request({ + task = task, + url = "https://example.com/api/protected", + method = "GET", + user = "username", + password = "secret", + callback = function(err, code, body, headers) + -- Process authenticated response + end + }) +end + +-- Using HTTPS with SSL options +local function https_example(task) + rspamd_http.request({ + task = task, + url = "https://example.com/api/secure", + method = "GET", + no_ssl_verify = false, -- Verify SSL (default) + callback = function(err, code, body, headers) + -- Process secure response + end + }) +end + +-- Using keep-alive and gzip +local function advanced_example(task) + rspamd_http.request({ + task = task, + url = "http://example.com/api/data", + method = "POST", + body = task:get_content(), + gzip = true, -- Compress request body + keepalive = true, -- Use keep-alive connection + max_size = 1024 * 1024, -- Limit response to 1MB + callback = function(err, code, body, headers) + -- Process response + end + }) +end */ #define MAX_HEADERS_SIZE 8192 @@ -602,7 +703,7 @@ lua_http_push_headers(lua_State *L, struct rspamd_http_message *msg) * @param {string} url specifies URL for a request in the standard URI form (e.g. 'http://example.com/path') * @param {function} callback specifies callback function in format `function (err_message, code, body, headers)` that is called on HTTP request completion. if this parameter is missing, the function performs "pseudo-synchronous" call (see [Synchronous and Asynchronous API overview](/doc/developers/sync_async.html#API-example-http-module) * @param {task} task if called from symbol handler it is generally a good idea to use the common task objects: event base, DNS resolver and events session - * @param {table} headers optional headers in form `[name='value', name='value']` + * @param {table} headers optional headers in form `[name='value']` or `[name=['value1', 'value2']]` to duplicate a header with multiple values * @param {string} mime_type MIME type of the HTTP content (for example, `text/html`) * @param {string/text} body full body content, can be opaque `rspamd{text}` to avoid data copying * @param {number} timeout floating point request timeout value in seconds (default is 5.0 seconds) diff --git a/src/lua/lua_logger.c b/src/lua/lua_logger.c index 8f2aa5be1..04ff81b6d 100644 --- a/src/lua/lua_logger.c +++ b/src/lua/lua_logger.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -176,8 +176,8 @@ static const struct luaL_reg loggerlib_f[] = { static gsize lua_logger_out_type(lua_State *L, int pos, char *outbuf, - gsize len, struct lua_logger_trace *trace, - enum lua_logger_escape_type esc_type); + gsize len, struct lua_logger_trace *trace, + enum lua_logger_escape_type esc_type); static void lua_common_log_line(GLogLevelFlags level, @@ -215,12 +215,12 @@ lua_common_log_line(GLogLevelFlags level, } rspamd_common_log_function(NULL, - level, - module, - uid, - p, - "%s", - msg); + level, + module, + uid, + p, + "%s", + msg); } /*** Logger interface ***/ @@ -280,19 +280,22 @@ lua_logger_char_safe(int t, unsigned int esc_type) return true; } -/* Could return negative value in case of wrong argument number */ +#define LUA_MAX_ARGS 32 +/* Gracefully handles argument mismatches by substituting missing args and noting extra args */ static glong lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain, - const char *fmt, - enum lua_logger_escape_type esc_type) + const char *fmt, + enum lua_logger_escape_type esc_type) { const char *c; gsize r; int digit; - char *d = logbuf; unsigned int arg_num, cur_arg = 0, arg_max = lua_gettop(L) - offset; + gboolean args_used[LUA_MAX_ARGS]; + unsigned int used_args_count = 0; + memset(args_used, 0, sizeof(args_used)); while (remain > 1 && *fmt) { if (*fmt == '%') { ++fmt; @@ -300,12 +303,13 @@ lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain, if (*fmt == 's') { ++fmt; ++cur_arg; - } else { + } + else { arg_num = 0; while ((digit = g_ascii_digit_value(*fmt)) >= 0) { ++fmt; arg_num = arg_num * 10 + digit; - if (arg_num >= 100) { + if (arg_num >= LUA_MAX_ARGS) { /* Avoid ridiculously large numbers */ fmt = c; break; @@ -320,11 +324,19 @@ lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain, if (fmt > c) { if (cur_arg < 1 || cur_arg > arg_max) { - *d = 0; - return -((glong) cur_arg + 1); /* wrong argument number */ + /* Missing argument - substitute placeholder */ + r = rspamd_snprintf(d, remain, "<MISSING ARGUMENT>"); + } + else { + /* Valid argument - output it */ + r = lua_logger_out(L, offset + cur_arg, d, remain, esc_type); + /* Track which arguments are used */ + if (cur_arg <= LUA_MAX_ARGS && !args_used[cur_arg - 1]) { + args_used[cur_arg - 1] = TRUE; + used_args_count++; + } } - r = lua_logger_out(L, offset + cur_arg, d, remain, esc_type); g_assert(r < remain); remain -= r; d += r; @@ -339,11 +351,21 @@ lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain, --remain; } + /* Check for extra arguments and append warning if any */ + if (used_args_count > 0 && used_args_count < arg_max && remain > 1) { + unsigned int extra_args = arg_max - used_args_count; + r = rspamd_snprintf(d, remain, " <EXTRA %d ARGUMENTS>", (int) extra_args); + remain -= r; + d += r; + } + *d = 0; return d - logbuf; } +#undef LUA_MAX_ARGS + static gsize lua_logger_out_str(lua_State *L, int pos, char *outbuf, gsize len, @@ -486,12 +508,12 @@ lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len) return r; } -#define MOVE_BUF(d, remain, r) \ - (d) += (r); \ - (remain) -= (r); \ - if ((remain) <= 1) { \ - lua_settop(L, top); \ - goto table_oob; \ +#define MOVE_BUF(d, remain, r) \ + (d) += (r); \ + (remain) -= (r); \ + if ((remain) <= 1) { \ + lua_settop(L, top); \ + goto table_oob; \ } static gsize @@ -545,9 +567,10 @@ lua_logger_out_table(lua_State *L, int pos, char *outbuf, gsize len, if (first) { first = FALSE; - str = "[%d] = "; - } else { - str = ", [%d] = "; + str = "[%d] = "; + } + else { + str = ", [%d] = "; } r = rspamd_snprintf(d, remain, str, i); MOVE_BUF(d, remain, r); @@ -579,14 +602,12 @@ lua_logger_out_table(lua_State *L, int pos, char *outbuf, gsize len, if (first) { first = FALSE; str = "[%2] = %1"; - } else { + } + else { str = ", [%2] = %1"; } r = lua_logger_log_format_str(L, top + 1, d, remain, str, esc_type); - if (r < 0) { - /* should not happen */ - goto table_oob; - } + /* lua_logger_log_format_str now handles errors gracefully */ MOVE_BUF(d, remain, r); /* Remove key */ @@ -606,9 +627,9 @@ table_oob: static gsize lua_logger_out_type(lua_State *L, int pos, - char *outbuf, gsize len, - struct lua_logger_trace *trace, - enum lua_logger_escape_type esc_type) + char *outbuf, gsize len, + struct lua_logger_trace *trace, + enum lua_logger_escape_type esc_type) { if (len == 0) { return 0; @@ -640,8 +661,8 @@ lua_logger_out_type(lua_State *L, int pos, } gsize lua_logger_out(lua_State *L, int pos, - char *outbuf, gsize len, - enum lua_logger_escape_type esc_type) + char *outbuf, gsize len, + enum lua_logger_escape_type esc_type) { struct lua_logger_trace tr; memset(&tr, 0, sizeof(tr)); @@ -747,11 +768,8 @@ lua_logger_log_format(lua_State *L, int fmt_pos, gboolean is_string, return FALSE; } - glong ret = lua_logger_log_format_str(L, fmt_pos, logbuf, remain, fmt, is_string ? LUA_ESCAPE_UNPRINTABLE : LUA_ESCAPE_LOG); - if (ret < 0) { - msg_err("wrong argument number: %ud", -((int) ret + 1)); - return FALSE; - } + /* lua_logger_log_format_str now handles argument mismatches gracefully */ + lua_logger_log_format_str(L, fmt_pos, logbuf, remain, fmt, is_string ? LUA_ESCAPE_UNPRINTABLE : LUA_ESCAPE_LOG); return TRUE; } diff --git a/src/lua/lua_map.c b/src/lua/lua_map.c index 062613bd7..5f55ece06 100644 --- a/src/lua/lua_map.c +++ b/src/lua/lua_map.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -319,6 +319,11 @@ int lua_config_radix_from_ucl(lua_State *L) ucl_object_insert_key(fake_obj, ucl_object_fromstring("static"), "url", 0, false); + if (lua_type(L, 3) == LUA_TSTRING) { + ucl_object_insert_key(fake_obj, ucl_object_fromstring(lua_tostring(L, 3)), + "description", 0, false); + } + if ((m = rspamd_map_add_from_ucl(cfg, fake_obj, "static radix map", rspamd_radix_read, rspamd_radix_fin, diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index 07dba9c93..982b10d90 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -901,7 +901,7 @@ lua_textpart_get_words_count(lua_State *L) return 1; } - if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) { lua_pushinteger(L, 0); } else { @@ -943,7 +943,7 @@ lua_textpart_get_words(lua_State *L) return luaL_error(L, "invalid arguments"); } - if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) { lua_createtable(L, 0, 0); } else { @@ -957,7 +957,7 @@ lua_textpart_get_words(lua_State *L) } } - return rspamd_lua_push_words(L, part->utf_words, how); + return rspamd_lua_push_words_kvec(L, &part->utf_words, how); } return 1; @@ -976,7 +976,7 @@ lua_textpart_filter_words(lua_State *L) return luaL_error(L, "invalid arguments"); } - if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) { lua_createtable(L, 0, 0); } else { @@ -998,9 +998,8 @@ lua_textpart_filter_words(lua_State *L) lua_createtable(L, 8, 0); - for (i = 0, cnt = 1; i < part->utf_words->len; i++) { - rspamd_stat_token_t *w = &g_array_index(part->utf_words, - rspamd_stat_token_t, i); + for (i = 0, cnt = 1; i < kv_size(part->utf_words); i++) { + rspamd_word_t *w = &kv_A(part->utf_words, i); switch (how) { case RSPAMD_LUA_WORDS_STEM: @@ -1194,13 +1193,13 @@ struct lua_shingle_filter_cbdata { rspamd_mempool_t *pool; }; -#define STORE_TOKEN(i, t) \ - do { \ - if ((i) < part->utf_words->len) { \ - word = &g_array_index(part->utf_words, rspamd_stat_token_t, (i)); \ - sd->t.begin = word->stemmed.begin; \ - sd->t.len = word->stemmed.len; \ - } \ +#define STORE_TOKEN(i, t) \ + do { \ + if ((i) < kv_size(part->utf_words)) { \ + word = &kv_A(part->utf_words, (i)); \ + sd->t.begin = word->stemmed.begin; \ + sd->t.len = word->stemmed.len; \ + } \ } while (0) static uint64_t @@ -1210,7 +1209,7 @@ lua_shingles_filter(uint64_t *input, gsize count, uint64_t minimal = G_MAXUINT64; gsize i, min_idx = 0; struct lua_shingle_data *sd; - rspamd_stat_token_t *word; + rspamd_word_t *word; struct lua_shingle_filter_cbdata *cbd = (struct lua_shingle_filter_cbdata *) ud; struct rspamd_mime_text_part *part; @@ -1248,7 +1247,7 @@ lua_textpart_get_fuzzy_hashes(lua_State *L) unsigned int i; struct lua_shingle_data *sd; rspamd_cryptobox_hash_state_t st; - rspamd_stat_token_t *word; + rspamd_word_t *word; struct lua_shingle_filter_cbdata cbd; @@ -1256,7 +1255,7 @@ lua_textpart_get_fuzzy_hashes(lua_State *L) return luaL_error(L, "invalid arguments"); } - if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) { lua_pushnil(L); lua_pushnil(L); } @@ -1269,8 +1268,8 @@ lua_textpart_get_fuzzy_hashes(lua_State *L) /* Calculate direct hash */ rspamd_cryptobox_hash_init(&st, key, rspamd_cryptobox_HASHKEYBYTES); - for (i = 0; i < part->utf_words->len; i++) { - word = &g_array_index(part->utf_words, rspamd_stat_token_t, i); + for (i = 0; i < kv_size(part->utf_words); i++) { + word = &kv_A(part->utf_words, i); rspamd_cryptobox_hash_update(&st, word->stemmed.begin, word->stemmed.len); } @@ -1283,7 +1282,7 @@ lua_textpart_get_fuzzy_hashes(lua_State *L) cbd.pool = pool; cbd.part = part; - sgl = rspamd_shingles_from_text(part->utf_words, key, + sgl = rspamd_shingles_from_text(&part->utf_words, key, pool, lua_shingles_filter, &cbd, RSPAMD_SHINGLES_MUMHASH); if (sgl == NULL) { diff --git a/src/lua/lua_parsers.c b/src/lua/lua_parsers.c index f77b36952..39e1b0317 100644 --- a/src/lua/lua_parsers.c +++ b/src/lua/lua_parsers.c @@ -1,11 +1,11 @@ -/*- - * Copyright 2020 Vsevolod Stakhov +/* + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -108,8 +108,8 @@ int lua_parsers_tokenize_text(lua_State *L) struct rspamd_lua_text *t; struct rspamd_process_exception *ex; UText utxt = UTEXT_INITIALIZER; - GArray *res; - rspamd_stat_token_t *w; + rspamd_words_t *res; + rspamd_word_t *w; if (lua_type(L, 1) == LUA_TSTRING) { in = luaL_checklstring(L, 1, &len); @@ -175,13 +175,15 @@ int lua_parsers_tokenize_text(lua_State *L) lua_pushnil(L); } else { - lua_createtable(L, res->len, 0); + lua_createtable(L, kv_size(*res), 0); - for (i = 0; i < res->len; i++) { - w = &g_array_index(res, rspamd_stat_token_t, i); + for (i = 0; i < kv_size(*res); i++) { + w = &kv_A(*res, i); lua_pushlstring(L, w->original.begin, w->original.len); lua_rawseti(L, -2, i + 1); } + kv_destroy(*res); + g_free(res); } cur = exceptions; diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 97f9c496e..0b1473b61 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -6943,7 +6943,7 @@ lua_task_get_meta_words(lua_State *L) return luaL_error(L, "invalid arguments"); } - if (task->meta_words == NULL) { + if (!task->meta_words.a) { lua_createtable(L, 0, 0); } else { @@ -6967,7 +6967,7 @@ lua_task_get_meta_words(lua_State *L) } } - return rspamd_lua_push_words(L, task->meta_words, how); + return rspamd_lua_push_words_kvec(L, &task->meta_words, how); } return 1; @@ -7039,6 +7039,76 @@ lua_lookup_words_array(lua_State *L, return nmatched; } +static unsigned int +lua_lookup_words_kvec(lua_State *L, + int cbpos, + struct rspamd_task *task, + struct rspamd_lua_map *map, + rspamd_words_t *words) +{ + rspamd_word_t *tok; + unsigned int i, nmatched = 0; + int err_idx; + gboolean matched; + const char *key; + gsize keylen; + + if (!words || !words->a) { + return 0; + } + + for (i = 0; i < kv_size(*words); i++) { + tok = &kv_A(*words, i); + + matched = FALSE; + + if (tok->normalized.len == 0) { + continue; + } + + key = tok->normalized.begin; + keylen = tok->normalized.len; + + switch (map->type) { + case RSPAMD_LUA_MAP_SET: + case RSPAMD_LUA_MAP_HASH: + /* We know that tok->normalized is zero terminated in fact */ + if (rspamd_match_hash_map(map->data.hash, key, keylen)) { + matched = TRUE; + } + break; + case RSPAMD_LUA_MAP_REGEXP: + case RSPAMD_LUA_MAP_REGEXP_MULTIPLE: + if (rspamd_match_regexp_map_single(map->data.re_map, key, + keylen)) { + matched = TRUE; + } + break; + default: + g_assert_not_reached(); + break; + } + + if (matched) { + nmatched++; + + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + lua_pushvalue(L, cbpos); /* Function */ + rspamd_lua_push_full_word(L, tok); + + if (lua_pcall(L, 1, 0, err_idx) != 0) { + msg_err_task("cannot call callback function for lookup words: %s", + lua_tostring(L, -1)); + } + + lua_settop(L, err_idx - 1); + } + } + + return nmatched; +} + static int lua_task_lookup_words(lua_State *L) { @@ -7062,13 +7132,13 @@ lua_task_lookup_words(lua_State *L) PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp) { - if (tp->utf_words) { - matches += lua_lookup_words_array(L, 3, task, map, tp->utf_words); + if (tp->utf_words.a) { + matches += lua_lookup_words_kvec(L, 3, task, map, &tp->utf_words); } } - if (task->meta_words) { - matches += lua_lookup_words_array(L, 3, task, map, task->meta_words); + if (task->meta_words.a) { + matches += lua_lookup_words_kvec(L, 3, task, map, &task->meta_words); } lua_pushinteger(L, matches); diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 9fe862757..f2e9b8fa9 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -23,12 +23,21 @@ #include "lua_parsers.h" -#ifdef WITH_LUA_REPL -#include "replxx.h" -#endif +#include "replxx.h" #include <math.h> #include <glob.h> +#include <sys/types.h> +#include <sys/time.h> +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) +#include <sys/sysctl.h> +#ifdef __FreeBSD__ +#include <sys/user.h> +#endif +#endif +#ifdef __APPLE__ +#include <mach/mach.h> +#endif #include "unicode/uspoof.h" #include "unicode/uscript.h" @@ -629,6 +638,27 @@ LUA_FUNCTION_DEF(util, caseless_hash_fast); LUA_FUNCTION_DEF(util, get_hostname); /*** + * @function util.get_uptime() + * Returns system uptime in seconds + * @return {number} uptime in seconds + */ +LUA_FUNCTION_DEF(util, get_uptime); + +/*** + * @function util.get_pid() + * Returns current process PID + * @return {number} process ID + */ +LUA_FUNCTION_DEF(util, get_pid); + +/*** + * @function util.get_memory_usage() + * Returns memory usage information for current process + * @return {table} memory usage info with 'rss' and 'vsize' fields in bytes + */ +LUA_FUNCTION_DEF(util, get_memory_usage); + +/*** * @function util.parse_content_type(ct_string, mempool) * Parses content-type string to a table: * - `type` @@ -730,6 +760,9 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF(util, umask), LUA_INTERFACE_DEF(util, isatty), LUA_INTERFACE_DEF(util, get_hostname), + LUA_INTERFACE_DEF(util, get_uptime), + LUA_INTERFACE_DEF(util, get_pid), + LUA_INTERFACE_DEF(util, get_memory_usage), LUA_INTERFACE_DEF(util, parse_content_type), LUA_INTERFACE_DEF(util, mime_header_encode), LUA_INTERFACE_DEF(util, pack), @@ -2416,6 +2449,107 @@ lua_util_get_hostname(lua_State *L) } static int +lua_util_get_uptime(lua_State *L) +{ + LUA_TRACE_POINT; + double uptime = 0.0; + +#ifdef __linux__ + FILE *f = fopen("/proc/uptime", "r"); + if (f) { + if (fscanf(f, "%lf", &uptime) != 1) { + uptime = 0.0; + } + fclose(f); + } +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + struct timeval boottime; + size_t len = sizeof(boottime); + int mib[2] = {CTL_KERN, KERN_BOOTTIME}; + + if (sysctl(mib, 2, &boottime, &len, NULL, 0) == 0) { + struct timeval now; + gettimeofday(&now, NULL); + uptime = (now.tv_sec - boottime.tv_sec) + + (now.tv_usec - boottime.tv_usec) / 1000000.0; + } +#endif + + lua_pushnumber(L, uptime); + return 1; +} + +static int +lua_util_get_pid(lua_State *L) +{ + LUA_TRACE_POINT; + lua_pushinteger(L, getpid()); + return 1; +} + +static int +lua_util_get_memory_usage(lua_State *L) +{ + LUA_TRACE_POINT; + lua_createtable(L, 0, 2); + +#ifdef __linux__ + FILE *f = fopen("/proc/self/status", "r"); + if (f) { + char line[256]; + long rss = 0, vsize = 0; + + while (fgets(line, sizeof(line), f)) { + if (sscanf(line, "VmRSS: %ld kB", &rss) == 1) { + rss *= 1024; /* Convert to bytes */ + } + else if (sscanf(line, "VmSize: %ld kB", &vsize) == 1) { + vsize *= 1024; /* Convert to bytes */ + } + } + fclose(f); + + lua_pushstring(L, "rss"); + lua_pushinteger(L, rss); + lua_settable(L, -3); + + lua_pushstring(L, "vsize"); + lua_pushinteger(L, vsize); + lua_settable(L, -3); + } +#elif defined(__APPLE__) + struct task_basic_info info; + mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT; + + if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t) &info, &count) == KERN_SUCCESS) { + lua_pushstring(L, "rss"); + lua_pushinteger(L, info.resident_size); + lua_settable(L, -3); + + lua_pushstring(L, "vsize"); + lua_pushinteger(L, info.virtual_size); + lua_settable(L, -3); + } +#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + struct kinfo_proc kp; + size_t len = sizeof(kp); + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, getpid()}; + + if (sysctl(mib, 4, &kp, &len, NULL, 0) == 0) { + lua_pushstring(L, "rss"); + lua_pushinteger(L, kp.ki_rssize * getpagesize()); + lua_settable(L, -3); + + lua_pushstring(L, "vsize"); + lua_pushinteger(L, kp.ki_size); + lua_settable(L, -3); + } +#endif + + return 1; +} + +static int lua_util_parse_content_type(lua_State *L) { return lua_parsers_parse_content_type(L); @@ -2510,7 +2644,7 @@ lua_util_readline(lua_State *L) if (lua_type(L, 1) == LUA_TSTRING) { prompt = lua_tostring(L, 1); } -#ifdef WITH_LUA_REPL + static Replxx *rx_instance = NULL; if (rx_instance == NULL) { @@ -2527,26 +2661,6 @@ lua_util_readline(lua_State *L) else { lua_pushnil(L); } -#else - size_t linecap = 0; - ssize_t linelen; - - fprintf(stdout, "%s ", prompt); - - linelen = getline(&input, &linecap, stdin); - - if (linelen > 0) { - if (input[linelen - 1] == '\n') { - linelen--; - } - - lua_pushlstring(L, input, linelen); - free(input); - } - else { - lua_pushnil(L); - } -#endif return 1; } @@ -3721,4 +3835,4 @@ lua_ev_base_add_timer(lua_State *L) ev_timer_start(ev_base, &cbdata->ev); return 0; -}
\ No newline at end of file +} |