aboutsummaryrefslogtreecommitdiffstats
path: root/src/lua
diff options
context:
space:
mode:
Diffstat (limited to 'src/lua')
-rw-r--r--src/lua/lua_common.c52
-rw-r--r--src/lua/lua_common.h7
-rw-r--r--src/lua/lua_config.c92
-rw-r--r--src/lua/lua_cryptobox.c2
-rw-r--r--src/lua/lua_http.c123
-rw-r--r--src/lua/lua_logger.c102
-rw-r--r--src/lua/lua_map.c7
-rw-r--r--src/lua/lua_mimepart.c39
-rw-r--r--src/lua/lua_parsers.c18
-rw-r--r--src/lua/lua_task.c82
-rw-r--r--src/lua/lua_util.c164
11 files changed, 562 insertions, 126 deletions
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c
index 3a0f1a06c..f36228680 100644
--- a/src/lua/lua_common.c
+++ b/src/lua/lua_common.c
@@ -2401,7 +2401,7 @@ rspamd_lua_try_load_redis(lua_State *L, const ucl_object_t *obj,
return FALSE;
}
-void rspamd_lua_push_full_word(lua_State *L, rspamd_stat_token_t *w)
+void rspamd_lua_push_full_word(lua_State *L, rspamd_word_t *w)
{
int fl_cnt;
@@ -2521,6 +2521,54 @@ int rspamd_lua_push_words(lua_State *L, GArray *words,
return 1;
}
+int rspamd_lua_push_words_kvec(lua_State *L, rspamd_words_t *words,
+ enum rspamd_lua_words_type how)
+{
+ rspamd_word_t *w;
+ unsigned int i, cnt;
+
+ if (!words || !words->a) {
+ lua_createtable(L, 0, 0);
+ return 1;
+ }
+
+ lua_createtable(L, kv_size(*words), 0);
+
+ for (i = 0, cnt = 1; i < kv_size(*words); i++) {
+ w = &kv_A(*words, i);
+
+ switch (how) {
+ case RSPAMD_LUA_WORDS_STEM:
+ if (w->stemmed.len > 0) {
+ lua_pushlstring(L, w->stemmed.begin, w->stemmed.len);
+ lua_rawseti(L, -2, cnt++);
+ }
+ break;
+ case RSPAMD_LUA_WORDS_NORM:
+ if (w->normalized.len > 0) {
+ lua_pushlstring(L, w->normalized.begin, w->normalized.len);
+ lua_rawseti(L, -2, cnt++);
+ }
+ break;
+ case RSPAMD_LUA_WORDS_RAW:
+ if (w->original.len > 0) {
+ lua_pushlstring(L, w->original.begin, w->original.len);
+ lua_rawseti(L, -2, cnt++);
+ }
+ break;
+ case RSPAMD_LUA_WORDS_FULL:
+ rspamd_lua_push_full_word(L, w);
+ /* Push to the resulting vector */
+ lua_rawseti(L, -2, cnt++);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 1;
+}
+
char *
rspamd_lua_get_module_name(lua_State *L)
{
@@ -2658,4 +2706,4 @@ int rspamd_lua_geti(lua_State *L, int pos, int i)
return lua_type(L, -1);
}
-#endif \ No newline at end of file
+#endif
diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h
index 5819da8cb..d494f0923 100644
--- a/src/lua/lua_common.h
+++ b/src/lua/lua_common.h
@@ -539,7 +539,7 @@ enum lua_logger_escape_type {
* @return
*/
gsize lua_logger_out(lua_State *L, int pos, char *outbuf, gsize len,
- enum lua_logger_escape_type esc_type);
+ enum lua_logger_escape_type esc_type);
/**
* Safely checks userdata to match specified class
@@ -632,7 +632,7 @@ struct rspamd_stat_token_s;
* @param L
* @param word
*/
-void rspamd_lua_push_full_word(lua_State *L, struct rspamd_stat_token_s *word);
+void rspamd_lua_push_full_word(lua_State *L, rspamd_word_t *word);
enum rspamd_lua_words_type {
RSPAMD_LUA_WORDS_STEM = 0,
@@ -651,6 +651,9 @@ enum rspamd_lua_words_type {
int rspamd_lua_push_words(lua_State *L, GArray *words,
enum rspamd_lua_words_type how);
+int rspamd_lua_push_words_kvec(lua_State *L, rspamd_words_t *words,
+ enum rspamd_lua_words_type how);
+
/**
* Returns newly allocated name for caller module name
* @param L
diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c
index 07ed58ad5..7b3a156cd 100644
--- a/src/lua/lua_config.c
+++ b/src/lua/lua_config.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -24,6 +24,10 @@
#include "utlist.h"
#include <math.h>
+/* Forward declarations for custom tokenizer functions */
+gboolean rspamd_config_load_custom_tokenizers(struct rspamd_config *cfg, GError **err);
+void rspamd_config_unload_custom_tokenizers(struct rspamd_config *cfg);
+
/***
* This module is used to configure rspamd and is normally available as global
* variable named `rspamd_config`. Unlike other modules, it is not necessary to
@@ -118,7 +122,7 @@ local function foo(task)
end
*/
/***
-* @method rspamd_config:radix_from_ucl(obj)
+* @method rspamd_config:radix_from_ucl(obj, description)
* Creates new embedded map of IP/mask addresses from object.
* @param {ucl} obj object
* @return {map} radix tree object
@@ -862,6 +866,19 @@ LUA_FUNCTION_DEF(config, get_dns_max_requests);
*/
LUA_FUNCTION_DEF(config, get_dns_timeout);
+/***
+ * @method rspamd_config:load_custom_tokenizers()
+ * Loads custom tokenizers from configuration
+ * @return {boolean} true if successful
+ */
+LUA_FUNCTION_DEF(config, load_custom_tokenizers);
+
+/***
+ * @method rspamd_config:unload_custom_tokenizers()
+ * Unloads custom tokenizers and frees memory
+ */
+LUA_FUNCTION_DEF(config, unload_custom_tokenizers);
+
static const struct luaL_reg configlib_m[] = {
LUA_INTERFACE_DEF(config, get_module_opt),
LUA_INTERFACE_DEF(config, get_mempool),
@@ -937,6 +954,8 @@ static const struct luaL_reg configlib_m[] = {
LUA_INTERFACE_DEF(config, get_tld_path),
LUA_INTERFACE_DEF(config, get_dns_max_requests),
LUA_INTERFACE_DEF(config, get_dns_timeout),
+ LUA_INTERFACE_DEF(config, load_custom_tokenizers),
+ LUA_INTERFACE_DEF(config, unload_custom_tokenizers),
{"__tostring", rspamd_lua_class_tostring},
{"__newindex", lua_config_newindex},
{NULL, NULL}};
@@ -4485,11 +4504,14 @@ lua_config_init_subsystem(lua_State *L)
nparts = g_strv_length(parts);
for (i = 0; i < nparts; i++) {
- if (strcmp(parts[i], "filters") == 0) {
+ const char *str = parts[i];
+
+ /* TODO: total shit, rework some day */
+ if (strcmp(str, "filters") == 0) {
rspamd_lua_post_load_config(cfg);
rspamd_init_filters(cfg, false, false);
}
- else if (strcmp(parts[i], "langdet") == 0) {
+ else if (strcmp(str, "langdet") == 0) {
if (!cfg->lang_det) {
cfg->lang_det = rspamd_language_detector_init(cfg);
rspamd_mempool_add_destructor(cfg->cfg_pool,
@@ -4497,10 +4519,10 @@ lua_config_init_subsystem(lua_State *L)
cfg->lang_det);
}
}
- else if (strcmp(parts[i], "stat") == 0) {
+ else if (strcmp(str, "stat") == 0) {
rspamd_stat_init(cfg, NULL);
}
- else if (strcmp(parts[i], "dns") == 0) {
+ else if (strcmp(str, "dns") == 0) {
struct ev_loop *ev_base = lua_check_ev_base(L, 3);
if (ev_base) {
@@ -4514,11 +4536,25 @@ lua_config_init_subsystem(lua_State *L)
return luaL_error(L, "no event base specified");
}
}
- else if (strcmp(parts[i], "symcache") == 0) {
+ else if (strcmp(str, "symcache") == 0) {
rspamd_symcache_init(cfg->cache);
}
+ else if (strcmp(str, "tokenizers") == 0 || strcmp(str, "custom_tokenizers") == 0) {
+ GError *err = NULL;
+ if (!rspamd_config_load_custom_tokenizers(cfg, &err)) {
+ g_strfreev(parts);
+ if (err) {
+ int ret = luaL_error(L, "failed to load custom tokenizers: %s", err->message);
+ g_error_free(err);
+ return ret;
+ }
+ else {
+ return luaL_error(L, "failed to load custom tokenizers");
+ }
+ }
+ }
else {
- int ret = luaL_error(L, "invalid param: %s", parts[i]);
+ int ret = luaL_error(L, "invalid param: %s", str);
g_strfreev(parts);
return ret;
@@ -4772,3 +4808,43 @@ void lua_call_finish_script(struct rspamd_config_cfg_lua_script *sc,
lua_thread_call(thread, 1);
}
+
+static int
+lua_config_load_custom_tokenizers(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_config *cfg = lua_check_config(L, 1);
+
+ if (cfg != NULL) {
+ GError *err = NULL;
+ gboolean ret = rspamd_config_load_custom_tokenizers(cfg, &err);
+
+ if (!ret && err) {
+ lua_pushboolean(L, FALSE);
+ lua_pushstring(L, err->message);
+ g_error_free(err);
+ return 2;
+ }
+
+ lua_pushboolean(L, ret);
+ return 1;
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+}
+
+static int
+lua_config_unload_custom_tokenizers(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_config *cfg = lua_check_config(L, 1);
+
+ if (cfg != NULL) {
+ rspamd_config_unload_custom_tokenizers(cfg);
+ return 0;
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+}
diff --git a/src/lua/lua_cryptobox.c b/src/lua/lua_cryptobox.c
index 721d71256..2c2254920 100644
--- a/src/lua/lua_cryptobox.c
+++ b/src/lua/lua_cryptobox.c
@@ -404,7 +404,7 @@ lua_cryptobox_keypair_load(lua_State *L)
if (lua_type(L, 1) == LUA_TSTRING) {
buf = luaL_checklstring(L, 1, &len);
if (buf != NULL) {
- parser = ucl_parser_new(0);
+ parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
if (!ucl_parser_add_chunk(parser, buf, len)) {
msg_err("cannot open keypair from data: %s",
diff --git a/src/lua/lua_http.c b/src/lua/lua_http.c
index 7e9e7b1df..731b8b057 100644
--- a/src/lua/lua_http.c
+++ b/src/lua/lua_http.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -29,22 +29,123 @@
* This module hides all complexity: DNS resolving, sessions management, zero-copy
* text transfers and so on under the hood.
* @example
+-- Basic GET request with callback
local rspamd_http = require "rspamd_http"
local function symbol_callback(task)
local function http_callback(err_message, code, body, headers)
task:insert_result('SYMBOL', 1) -- task is available via closure
+
+ if err_message then
+ -- Handle error
+ return
+ end
+
+ -- Process response
+ if code == 200 then
+ -- Process body and headers
+ for name, value in pairs(headers) do
+ -- Headers are lowercase
+ end
+ end
end
- rspamd_http.request({
- task=task,
- url='http://example.com/data',
- body=task:get_content(),
- callback=http_callback,
- headers={Header='Value', OtherHeader='Value'},
- mime_type='text/plain',
- })
- end
+ rspamd_http.request({
+ task=task,
+ url='http://example.com/data',
+ body=task:get_content(),
+ callback=http_callback,
+ headers={Header='Value', OtherHeader='Value', DuplicatedHeader={'Multiple', 'Values'}},
+ mime_type='text/plain',
+ })
+end
+
+-- POST request with JSON body
+local function post_json_example(task)
+ local ucl = require "ucl"
+ local data = {
+ id = task:get_queue_id(),
+ sender = task:get_from()[1].addr
+ }
+
+ local json_data = ucl.to_json(data)
+
+ rspamd_http.request({
+ task = task,
+ url = "http://example.com/api/submit",
+ method = "POST",
+ body = json_data,
+ headers = {['Content-Type'] = 'application/json'},
+ callback = function(err, code, body, headers)
+ if not err and code == 200 then
+ -- Success
+ end
+ end
+ })
+end
+
+-- Synchronous HTTP request (using coroutines)
+local function sync_http_example(task)
+ -- No callback makes this a synchronous call
+ local err, response = rspamd_http.request({
+ task = task,
+ url = "http://example.com/api/data",
+ method = "GET",
+ timeout = 10.0
+ })
+
+ if not err then
+ -- Response is a table with code, content, and headers
+ if response.code == 200 then
+ -- Process response.content
+ return true
+ end
+ end
+ return false
+end
+
+-- Using authentication
+local function auth_example(task)
+ rspamd_http.request({
+ task = task,
+ url = "https://example.com/api/protected",
+ method = "GET",
+ user = "username",
+ password = "secret",
+ callback = function(err, code, body, headers)
+ -- Process authenticated response
+ end
+ })
+end
+
+-- Using HTTPS with SSL options
+local function https_example(task)
+ rspamd_http.request({
+ task = task,
+ url = "https://example.com/api/secure",
+ method = "GET",
+ no_ssl_verify = false, -- Verify SSL (default)
+ callback = function(err, code, body, headers)
+ -- Process secure response
+ end
+ })
+end
+
+-- Using keep-alive and gzip
+local function advanced_example(task)
+ rspamd_http.request({
+ task = task,
+ url = "http://example.com/api/data",
+ method = "POST",
+ body = task:get_content(),
+ gzip = true, -- Compress request body
+ keepalive = true, -- Use keep-alive connection
+ max_size = 1024 * 1024, -- Limit response to 1MB
+ callback = function(err, code, body, headers)
+ -- Process response
+ end
+ })
+end
*/
#define MAX_HEADERS_SIZE 8192
@@ -602,7 +703,7 @@ lua_http_push_headers(lua_State *L, struct rspamd_http_message *msg)
* @param {string} url specifies URL for a request in the standard URI form (e.g. 'http://example.com/path')
* @param {function} callback specifies callback function in format `function (err_message, code, body, headers)` that is called on HTTP request completion. if this parameter is missing, the function performs "pseudo-synchronous" call (see [Synchronous and Asynchronous API overview](/doc/developers/sync_async.html#API-example-http-module)
* @param {task} task if called from symbol handler it is generally a good idea to use the common task objects: event base, DNS resolver and events session
- * @param {table} headers optional headers in form `[name='value', name='value']`
+ * @param {table} headers optional headers in form `[name='value']` or `[name=['value1', 'value2']]` to duplicate a header with multiple values
* @param {string} mime_type MIME type of the HTTP content (for example, `text/html`)
* @param {string/text} body full body content, can be opaque `rspamd{text}` to avoid data copying
* @param {number} timeout floating point request timeout value in seconds (default is 5.0 seconds)
diff --git a/src/lua/lua_logger.c b/src/lua/lua_logger.c
index 8f2aa5be1..04ff81b6d 100644
--- a/src/lua/lua_logger.c
+++ b/src/lua/lua_logger.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -176,8 +176,8 @@ static const struct luaL_reg loggerlib_f[] = {
static gsize
lua_logger_out_type(lua_State *L, int pos, char *outbuf,
- gsize len, struct lua_logger_trace *trace,
- enum lua_logger_escape_type esc_type);
+ gsize len, struct lua_logger_trace *trace,
+ enum lua_logger_escape_type esc_type);
static void
lua_common_log_line(GLogLevelFlags level,
@@ -215,12 +215,12 @@ lua_common_log_line(GLogLevelFlags level,
}
rspamd_common_log_function(NULL,
- level,
- module,
- uid,
- p,
- "%s",
- msg);
+ level,
+ module,
+ uid,
+ p,
+ "%s",
+ msg);
}
/*** Logger interface ***/
@@ -280,19 +280,22 @@ lua_logger_char_safe(int t, unsigned int esc_type)
return true;
}
-/* Could return negative value in case of wrong argument number */
+#define LUA_MAX_ARGS 32
+/* Gracefully handles argument mismatches by substituting missing args and noting extra args */
static glong
lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain,
- const char *fmt,
- enum lua_logger_escape_type esc_type)
+ const char *fmt,
+ enum lua_logger_escape_type esc_type)
{
const char *c;
gsize r;
int digit;
-
char *d = logbuf;
unsigned int arg_num, cur_arg = 0, arg_max = lua_gettop(L) - offset;
+ gboolean args_used[LUA_MAX_ARGS];
+ unsigned int used_args_count = 0;
+ memset(args_used, 0, sizeof(args_used));
while (remain > 1 && *fmt) {
if (*fmt == '%') {
++fmt;
@@ -300,12 +303,13 @@ lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain,
if (*fmt == 's') {
++fmt;
++cur_arg;
- } else {
+ }
+ else {
arg_num = 0;
while ((digit = g_ascii_digit_value(*fmt)) >= 0) {
++fmt;
arg_num = arg_num * 10 + digit;
- if (arg_num >= 100) {
+ if (arg_num >= LUA_MAX_ARGS) {
/* Avoid ridiculously large numbers */
fmt = c;
break;
@@ -320,11 +324,19 @@ lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain,
if (fmt > c) {
if (cur_arg < 1 || cur_arg > arg_max) {
- *d = 0;
- return -((glong) cur_arg + 1); /* wrong argument number */
+ /* Missing argument - substitute placeholder */
+ r = rspamd_snprintf(d, remain, "<MISSING ARGUMENT>");
+ }
+ else {
+ /* Valid argument - output it */
+ r = lua_logger_out(L, offset + cur_arg, d, remain, esc_type);
+ /* Track which arguments are used */
+ if (cur_arg <= LUA_MAX_ARGS && !args_used[cur_arg - 1]) {
+ args_used[cur_arg - 1] = TRUE;
+ used_args_count++;
+ }
}
- r = lua_logger_out(L, offset + cur_arg, d, remain, esc_type);
g_assert(r < remain);
remain -= r;
d += r;
@@ -339,11 +351,21 @@ lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain,
--remain;
}
+ /* Check for extra arguments and append warning if any */
+ if (used_args_count > 0 && used_args_count < arg_max && remain > 1) {
+ unsigned int extra_args = arg_max - used_args_count;
+ r = rspamd_snprintf(d, remain, " <EXTRA %d ARGUMENTS>", (int) extra_args);
+ remain -= r;
+ d += r;
+ }
+
*d = 0;
return d - logbuf;
}
+#undef LUA_MAX_ARGS
+
static gsize
lua_logger_out_str(lua_State *L, int pos,
char *outbuf, gsize len,
@@ -486,12 +508,12 @@ lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len)
return r;
}
-#define MOVE_BUF(d, remain, r) \
- (d) += (r); \
- (remain) -= (r); \
- if ((remain) <= 1) { \
- lua_settop(L, top); \
- goto table_oob; \
+#define MOVE_BUF(d, remain, r) \
+ (d) += (r); \
+ (remain) -= (r); \
+ if ((remain) <= 1) { \
+ lua_settop(L, top); \
+ goto table_oob; \
}
static gsize
@@ -545,9 +567,10 @@ lua_logger_out_table(lua_State *L, int pos, char *outbuf, gsize len,
if (first) {
first = FALSE;
- str = "[%d] = ";
- } else {
- str = ", [%d] = ";
+ str = "[%d] = ";
+ }
+ else {
+ str = ", [%d] = ";
}
r = rspamd_snprintf(d, remain, str, i);
MOVE_BUF(d, remain, r);
@@ -579,14 +602,12 @@ lua_logger_out_table(lua_State *L, int pos, char *outbuf, gsize len,
if (first) {
first = FALSE;
str = "[%2] = %1";
- } else {
+ }
+ else {
str = ", [%2] = %1";
}
r = lua_logger_log_format_str(L, top + 1, d, remain, str, esc_type);
- if (r < 0) {
- /* should not happen */
- goto table_oob;
- }
+ /* lua_logger_log_format_str now handles errors gracefully */
MOVE_BUF(d, remain, r);
/* Remove key */
@@ -606,9 +627,9 @@ table_oob:
static gsize
lua_logger_out_type(lua_State *L, int pos,
- char *outbuf, gsize len,
- struct lua_logger_trace *trace,
- enum lua_logger_escape_type esc_type)
+ char *outbuf, gsize len,
+ struct lua_logger_trace *trace,
+ enum lua_logger_escape_type esc_type)
{
if (len == 0) {
return 0;
@@ -640,8 +661,8 @@ lua_logger_out_type(lua_State *L, int pos,
}
gsize lua_logger_out(lua_State *L, int pos,
- char *outbuf, gsize len,
- enum lua_logger_escape_type esc_type)
+ char *outbuf, gsize len,
+ enum lua_logger_escape_type esc_type)
{
struct lua_logger_trace tr;
memset(&tr, 0, sizeof(tr));
@@ -747,11 +768,8 @@ lua_logger_log_format(lua_State *L, int fmt_pos, gboolean is_string,
return FALSE;
}
- glong ret = lua_logger_log_format_str(L, fmt_pos, logbuf, remain, fmt, is_string ? LUA_ESCAPE_UNPRINTABLE : LUA_ESCAPE_LOG);
- if (ret < 0) {
- msg_err("wrong argument number: %ud", -((int) ret + 1));
- return FALSE;
- }
+ /* lua_logger_log_format_str now handles argument mismatches gracefully */
+ lua_logger_log_format_str(L, fmt_pos, logbuf, remain, fmt, is_string ? LUA_ESCAPE_UNPRINTABLE : LUA_ESCAPE_LOG);
return TRUE;
}
diff --git a/src/lua/lua_map.c b/src/lua/lua_map.c
index 062613bd7..5f55ece06 100644
--- a/src/lua/lua_map.c
+++ b/src/lua/lua_map.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -319,6 +319,11 @@ int lua_config_radix_from_ucl(lua_State *L)
ucl_object_insert_key(fake_obj, ucl_object_fromstring("static"),
"url", 0, false);
+ if (lua_type(L, 3) == LUA_TSTRING) {
+ ucl_object_insert_key(fake_obj, ucl_object_fromstring(lua_tostring(L, 3)),
+ "description", 0, false);
+ }
+
if ((m = rspamd_map_add_from_ucl(cfg, fake_obj, "static radix map",
rspamd_radix_read,
rspamd_radix_fin,
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index 07dba9c93..982b10d90 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -901,7 +901,7 @@ lua_textpart_get_words_count(lua_State *L)
return 1;
}
- if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) {
+ if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) {
lua_pushinteger(L, 0);
}
else {
@@ -943,7 +943,7 @@ lua_textpart_get_words(lua_State *L)
return luaL_error(L, "invalid arguments");
}
- if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) {
+ if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) {
lua_createtable(L, 0, 0);
}
else {
@@ -957,7 +957,7 @@ lua_textpart_get_words(lua_State *L)
}
}
- return rspamd_lua_push_words(L, part->utf_words, how);
+ return rspamd_lua_push_words_kvec(L, &part->utf_words, how);
}
return 1;
@@ -976,7 +976,7 @@ lua_textpart_filter_words(lua_State *L)
return luaL_error(L, "invalid arguments");
}
- if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) {
+ if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) {
lua_createtable(L, 0, 0);
}
else {
@@ -998,9 +998,8 @@ lua_textpart_filter_words(lua_State *L)
lua_createtable(L, 8, 0);
- for (i = 0, cnt = 1; i < part->utf_words->len; i++) {
- rspamd_stat_token_t *w = &g_array_index(part->utf_words,
- rspamd_stat_token_t, i);
+ for (i = 0, cnt = 1; i < kv_size(part->utf_words); i++) {
+ rspamd_word_t *w = &kv_A(part->utf_words, i);
switch (how) {
case RSPAMD_LUA_WORDS_STEM:
@@ -1194,13 +1193,13 @@ struct lua_shingle_filter_cbdata {
rspamd_mempool_t *pool;
};
-#define STORE_TOKEN(i, t) \
- do { \
- if ((i) < part->utf_words->len) { \
- word = &g_array_index(part->utf_words, rspamd_stat_token_t, (i)); \
- sd->t.begin = word->stemmed.begin; \
- sd->t.len = word->stemmed.len; \
- } \
+#define STORE_TOKEN(i, t) \
+ do { \
+ if ((i) < kv_size(part->utf_words)) { \
+ word = &kv_A(part->utf_words, (i)); \
+ sd->t.begin = word->stemmed.begin; \
+ sd->t.len = word->stemmed.len; \
+ } \
} while (0)
static uint64_t
@@ -1210,7 +1209,7 @@ lua_shingles_filter(uint64_t *input, gsize count,
uint64_t minimal = G_MAXUINT64;
gsize i, min_idx = 0;
struct lua_shingle_data *sd;
- rspamd_stat_token_t *word;
+ rspamd_word_t *word;
struct lua_shingle_filter_cbdata *cbd = (struct lua_shingle_filter_cbdata *) ud;
struct rspamd_mime_text_part *part;
@@ -1248,7 +1247,7 @@ lua_textpart_get_fuzzy_hashes(lua_State *L)
unsigned int i;
struct lua_shingle_data *sd;
rspamd_cryptobox_hash_state_t st;
- rspamd_stat_token_t *word;
+ rspamd_word_t *word;
struct lua_shingle_filter_cbdata cbd;
@@ -1256,7 +1255,7 @@ lua_textpart_get_fuzzy_hashes(lua_State *L)
return luaL_error(L, "invalid arguments");
}
- if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) {
+ if (IS_TEXT_PART_EMPTY(part) || !part->utf_words.a) {
lua_pushnil(L);
lua_pushnil(L);
}
@@ -1269,8 +1268,8 @@ lua_textpart_get_fuzzy_hashes(lua_State *L)
/* Calculate direct hash */
rspamd_cryptobox_hash_init(&st, key, rspamd_cryptobox_HASHKEYBYTES);
- for (i = 0; i < part->utf_words->len; i++) {
- word = &g_array_index(part->utf_words, rspamd_stat_token_t, i);
+ for (i = 0; i < kv_size(part->utf_words); i++) {
+ word = &kv_A(part->utf_words, i);
rspamd_cryptobox_hash_update(&st,
word->stemmed.begin, word->stemmed.len);
}
@@ -1283,7 +1282,7 @@ lua_textpart_get_fuzzy_hashes(lua_State *L)
cbd.pool = pool;
cbd.part = part;
- sgl = rspamd_shingles_from_text(part->utf_words, key,
+ sgl = rspamd_shingles_from_text(&part->utf_words, key,
pool, lua_shingles_filter, &cbd, RSPAMD_SHINGLES_MUMHASH);
if (sgl == NULL) {
diff --git a/src/lua/lua_parsers.c b/src/lua/lua_parsers.c
index f77b36952..39e1b0317 100644
--- a/src/lua/lua_parsers.c
+++ b/src/lua/lua_parsers.c
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2020 Vsevolod Stakhov
+/*
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -108,8 +108,8 @@ int lua_parsers_tokenize_text(lua_State *L)
struct rspamd_lua_text *t;
struct rspamd_process_exception *ex;
UText utxt = UTEXT_INITIALIZER;
- GArray *res;
- rspamd_stat_token_t *w;
+ rspamd_words_t *res;
+ rspamd_word_t *w;
if (lua_type(L, 1) == LUA_TSTRING) {
in = luaL_checklstring(L, 1, &len);
@@ -175,13 +175,15 @@ int lua_parsers_tokenize_text(lua_State *L)
lua_pushnil(L);
}
else {
- lua_createtable(L, res->len, 0);
+ lua_createtable(L, kv_size(*res), 0);
- for (i = 0; i < res->len; i++) {
- w = &g_array_index(res, rspamd_stat_token_t, i);
+ for (i = 0; i < kv_size(*res); i++) {
+ w = &kv_A(*res, i);
lua_pushlstring(L, w->original.begin, w->original.len);
lua_rawseti(L, -2, i + 1);
}
+ kv_destroy(*res);
+ g_free(res);
}
cur = exceptions;
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 97f9c496e..0b1473b61 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -6943,7 +6943,7 @@ lua_task_get_meta_words(lua_State *L)
return luaL_error(L, "invalid arguments");
}
- if (task->meta_words == NULL) {
+ if (!task->meta_words.a) {
lua_createtable(L, 0, 0);
}
else {
@@ -6967,7 +6967,7 @@ lua_task_get_meta_words(lua_State *L)
}
}
- return rspamd_lua_push_words(L, task->meta_words, how);
+ return rspamd_lua_push_words_kvec(L, &task->meta_words, how);
}
return 1;
@@ -7039,6 +7039,76 @@ lua_lookup_words_array(lua_State *L,
return nmatched;
}
+static unsigned int
+lua_lookup_words_kvec(lua_State *L,
+ int cbpos,
+ struct rspamd_task *task,
+ struct rspamd_lua_map *map,
+ rspamd_words_t *words)
+{
+ rspamd_word_t *tok;
+ unsigned int i, nmatched = 0;
+ int err_idx;
+ gboolean matched;
+ const char *key;
+ gsize keylen;
+
+ if (!words || !words->a) {
+ return 0;
+ }
+
+ for (i = 0; i < kv_size(*words); i++) {
+ tok = &kv_A(*words, i);
+
+ matched = FALSE;
+
+ if (tok->normalized.len == 0) {
+ continue;
+ }
+
+ key = tok->normalized.begin;
+ keylen = tok->normalized.len;
+
+ switch (map->type) {
+ case RSPAMD_LUA_MAP_SET:
+ case RSPAMD_LUA_MAP_HASH:
+ /* We know that tok->normalized is zero terminated in fact */
+ if (rspamd_match_hash_map(map->data.hash, key, keylen)) {
+ matched = TRUE;
+ }
+ break;
+ case RSPAMD_LUA_MAP_REGEXP:
+ case RSPAMD_LUA_MAP_REGEXP_MULTIPLE:
+ if (rspamd_match_regexp_map_single(map->data.re_map, key,
+ keylen)) {
+ matched = TRUE;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+
+ if (matched) {
+ nmatched++;
+
+ lua_pushcfunction(L, &rspamd_lua_traceback);
+ err_idx = lua_gettop(L);
+ lua_pushvalue(L, cbpos); /* Function */
+ rspamd_lua_push_full_word(L, tok);
+
+ if (lua_pcall(L, 1, 0, err_idx) != 0) {
+ msg_err_task("cannot call callback function for lookup words: %s",
+ lua_tostring(L, -1));
+ }
+
+ lua_settop(L, err_idx - 1);
+ }
+ }
+
+ return nmatched;
+}
+
static int
lua_task_lookup_words(lua_State *L)
{
@@ -7062,13 +7132,13 @@ lua_task_lookup_words(lua_State *L)
PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp)
{
- if (tp->utf_words) {
- matches += lua_lookup_words_array(L, 3, task, map, tp->utf_words);
+ if (tp->utf_words.a) {
+ matches += lua_lookup_words_kvec(L, 3, task, map, &tp->utf_words);
}
}
- if (task->meta_words) {
- matches += lua_lookup_words_array(L, 3, task, map, task->meta_words);
+ if (task->meta_words.a) {
+ matches += lua_lookup_words_kvec(L, 3, task, map, &task->meta_words);
}
lua_pushinteger(L, matches);
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 9fe862757..f2e9b8fa9 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -23,12 +23,21 @@
#include "lua_parsers.h"
-#ifdef WITH_LUA_REPL
-#include "replxx.h"
-#endif
+#include "replxx.h"
#include <math.h>
#include <glob.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
+#include <sys/sysctl.h>
+#ifdef __FreeBSD__
+#include <sys/user.h>
+#endif
+#endif
+#ifdef __APPLE__
+#include <mach/mach.h>
+#endif
#include "unicode/uspoof.h"
#include "unicode/uscript.h"
@@ -629,6 +638,27 @@ LUA_FUNCTION_DEF(util, caseless_hash_fast);
LUA_FUNCTION_DEF(util, get_hostname);
/***
+ * @function util.get_uptime()
+ * Returns system uptime in seconds
+ * @return {number} uptime in seconds
+ */
+LUA_FUNCTION_DEF(util, get_uptime);
+
+/***
+ * @function util.get_pid()
+ * Returns current process PID
+ * @return {number} process ID
+ */
+LUA_FUNCTION_DEF(util, get_pid);
+
+/***
+ * @function util.get_memory_usage()
+ * Returns memory usage information for current process
+ * @return {table} memory usage info with 'rss' and 'vsize' fields in bytes
+ */
+LUA_FUNCTION_DEF(util, get_memory_usage);
+
+/***
* @function util.parse_content_type(ct_string, mempool)
* Parses content-type string to a table:
* - `type`
@@ -730,6 +760,9 @@ static const struct luaL_reg utillib_f[] = {
LUA_INTERFACE_DEF(util, umask),
LUA_INTERFACE_DEF(util, isatty),
LUA_INTERFACE_DEF(util, get_hostname),
+ LUA_INTERFACE_DEF(util, get_uptime),
+ LUA_INTERFACE_DEF(util, get_pid),
+ LUA_INTERFACE_DEF(util, get_memory_usage),
LUA_INTERFACE_DEF(util, parse_content_type),
LUA_INTERFACE_DEF(util, mime_header_encode),
LUA_INTERFACE_DEF(util, pack),
@@ -2416,6 +2449,107 @@ lua_util_get_hostname(lua_State *L)
}
static int
+lua_util_get_uptime(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ double uptime = 0.0;
+
+#ifdef __linux__
+ FILE *f = fopen("/proc/uptime", "r");
+ if (f) {
+ if (fscanf(f, "%lf", &uptime) != 1) {
+ uptime = 0.0;
+ }
+ fclose(f);
+ }
+#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
+ struct timeval boottime;
+ size_t len = sizeof(boottime);
+ int mib[2] = {CTL_KERN, KERN_BOOTTIME};
+
+ if (sysctl(mib, 2, &boottime, &len, NULL, 0) == 0) {
+ struct timeval now;
+ gettimeofday(&now, NULL);
+ uptime = (now.tv_sec - boottime.tv_sec) +
+ (now.tv_usec - boottime.tv_usec) / 1000000.0;
+ }
+#endif
+
+ lua_pushnumber(L, uptime);
+ return 1;
+}
+
+static int
+lua_util_get_pid(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ lua_pushinteger(L, getpid());
+ return 1;
+}
+
+static int
+lua_util_get_memory_usage(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ lua_createtable(L, 0, 2);
+
+#ifdef __linux__
+ FILE *f = fopen("/proc/self/status", "r");
+ if (f) {
+ char line[256];
+ long rss = 0, vsize = 0;
+
+ while (fgets(line, sizeof(line), f)) {
+ if (sscanf(line, "VmRSS: %ld kB", &rss) == 1) {
+ rss *= 1024; /* Convert to bytes */
+ }
+ else if (sscanf(line, "VmSize: %ld kB", &vsize) == 1) {
+ vsize *= 1024; /* Convert to bytes */
+ }
+ }
+ fclose(f);
+
+ lua_pushstring(L, "rss");
+ lua_pushinteger(L, rss);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "vsize");
+ lua_pushinteger(L, vsize);
+ lua_settable(L, -3);
+ }
+#elif defined(__APPLE__)
+ struct task_basic_info info;
+ mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT;
+
+ if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t) &info, &count) == KERN_SUCCESS) {
+ lua_pushstring(L, "rss");
+ lua_pushinteger(L, info.resident_size);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "vsize");
+ lua_pushinteger(L, info.virtual_size);
+ lua_settable(L, -3);
+ }
+#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
+ struct kinfo_proc kp;
+ size_t len = sizeof(kp);
+ int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, getpid()};
+
+ if (sysctl(mib, 4, &kp, &len, NULL, 0) == 0) {
+ lua_pushstring(L, "rss");
+ lua_pushinteger(L, kp.ki_rssize * getpagesize());
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "vsize");
+ lua_pushinteger(L, kp.ki_size);
+ lua_settable(L, -3);
+ }
+#endif
+
+ return 1;
+}
+
+static int
lua_util_parse_content_type(lua_State *L)
{
return lua_parsers_parse_content_type(L);
@@ -2510,7 +2644,7 @@ lua_util_readline(lua_State *L)
if (lua_type(L, 1) == LUA_TSTRING) {
prompt = lua_tostring(L, 1);
}
-#ifdef WITH_LUA_REPL
+
static Replxx *rx_instance = NULL;
if (rx_instance == NULL) {
@@ -2527,26 +2661,6 @@ lua_util_readline(lua_State *L)
else {
lua_pushnil(L);
}
-#else
- size_t linecap = 0;
- ssize_t linelen;
-
- fprintf(stdout, "%s ", prompt);
-
- linelen = getline(&input, &linecap, stdin);
-
- if (linelen > 0) {
- if (input[linelen - 1] == '\n') {
- linelen--;
- }
-
- lua_pushlstring(L, input, linelen);
- free(input);
- }
- else {
- lua_pushnil(L);
- }
-#endif
return 1;
}
@@ -3721,4 +3835,4 @@ lua_ev_base_add_timer(lua_State *L)
ev_timer_start(ev_base, &cbdata->ev);
return 0;
-} \ No newline at end of file
+}