diff options
-rw-r--r-- | conf/modules.d/contextal.conf | 21 | ||||
-rw-r--r-- | interface/index.html | 4 | ||||
-rw-r--r-- | interface/js/app/config.js | 32 | ||||
-rw-r--r-- | interface/js/app/rspamd.js | 2 | ||||
-rw-r--r-- | lualib/lua_maps.lua | 2 | ||||
-rw-r--r-- | lualib/lua_scanners/cloudmark.lua | 49 | ||||
-rw-r--r-- | lualib/lua_util.lua | 51 | ||||
-rw-r--r-- | rules/regexp/headers.lua | 8 | ||||
-rw-r--r-- | src/controller.c | 116 | ||||
-rw-r--r-- | src/libserver/fuzzy_backend/fuzzy_backend_redis.c | 4 | ||||
-rw-r--r-- | src/libserver/maps/map.c | 102 | ||||
-rw-r--r-- | src/libserver/maps/map_private.h | 36 | ||||
-rw-r--r-- | src/lua/lua_common.h | 3 | ||||
-rw-r--r-- | src/lua/lua_config.c | 4 | ||||
-rw-r--r-- | src/lua/lua_logger.c | 485 | ||||
-rw-r--r-- | src/lua/lua_map.c | 7 | ||||
-rw-r--r-- | src/plugins/lua/contextal.lua | 332 | ||||
-rw-r--r-- | src/plugins/lua/gpt.lua | 13 | ||||
-rw-r--r-- | src/plugins/lua/hfilter.lua | 13 | ||||
-rw-r--r-- | src/rspamadm/lua_repl.c | 8 |
20 files changed, 868 insertions, 424 deletions
diff --git a/conf/modules.d/contextal.conf b/conf/modules.d/contextal.conf new file mode 100644 index 000000000..da61b2cd8 --- /dev/null +++ b/conf/modules.d/contextal.conf @@ -0,0 +1,21 @@ +# Please don't modify this file as your changes might be overwritten with +# the next update. +# +# You can modify 'local.d/contextal.conf' to add and merge +# parameters defined inside this section +# +# You can modify 'override.d/contextal.conf' to strictly override all +# parameters defined inside this section +# +# See https://rspamd.com/doc/faq.html#what-are-the-locald-and-overrided-directories +# for details +# +# Module documentation can be found at https://rspamd.com/doc/modules/contextal.html + +contextal { + enabled = false; + + .include(try=true,priority=5) "${DBDIR}/dynamic/contextal.conf" + .include(try=true,priority=1,duplicate=merge) "$LOCAL_CONFDIR/local.d/contextal.conf" + .include(try=true,priority=10) "$LOCAL_CONFDIR/override.d/contextal.conf" +} diff --git a/interface/index.html b/interface/index.html index 9c42f4e7f..a759ac48f 100644 --- a/interface/index.html +++ b/interface/index.html @@ -328,7 +328,7 @@ <div class="card bg-light shadow my-3"> <div class="card-header text-secondary py-2 d-flex align-items-center"> <span class="icon me-3"><i class="fas fa-list"></i></span> - <span class="h6 fw-bolder my-auto">Lists</span> + <span class="h6 fw-bolder my-auto">Maps</span> <div class="input-group-sm ms-auto me-1"> Editor: <div id="btnGroupEditor" class="btn-group btn-group-xs ms-1"> @@ -342,6 +342,8 @@ </div> <div class="card-body p-0"> <table class="table table-sm table-hover mb-0" id="listMaps"> + <thead><tr><th>Flags</th><th>Type</th><th>URL</th><th>Description</th></tr></thead> + <tbody/> </table> </div> </div> diff --git a/interface/js/app/config.js b/interface/js/app/config.js index 037dabfdd..57e7ee37b 100644 --- a/interface/js/app/config.js +++ b/interface/js/app/config.js @@ -115,24 +115,28 @@ define(["jquery", "app/common"], common.query("maps", { success: function (json) { const [{data}] = json; - $listmaps.empty(); - $("#modalBody").empty(); - const $tbody = $("<tbody>"); + const $tbody = $listmaps.children("tbody").empty(); $.each(data, (i, item) => { - let $td = '<td><span class="badge text-bg-secondary">Read</span></td>'; - if (!(item.editable === false || common.read_only)) { - $td = $($td).append(' <span class="badge text-bg-success">Write</span>'); - } - const $tr = $("<tr>").append($td); + const $td = $("<td>"); + + const badges = [ + {text: "Not loaded", cls: "text-bg-warning", cond: !item.loaded}, + {text: "Cached", cls: "text-bg-info", cond: item.cached}, + {text: "Writable", cls: "text-bg-success", cond: !(item.editable === false || common.read_only)} + ]; + badges.forEach((b) => { + if (b.cond) $td.append($(`<span class="badge me-1 ${b.cls}">${b.text}</span>`)); + }); - const $span = $('<span class="map-link" data-bs-toggle="modal" data-bs-target="#modalDialog">' + - item.uri + "</span>").data("item", item); + const $tr = $("<tr>").append($td).append($("<td>" + item.type + "</td>")); + if (!item.loaded) $tr.addClass("table-light opacity-50"); + + const $span = $('<span class="map-link">' + item.uri + "</span>").data("item", item); $span.wrap("<td>").parent().appendTo($tr); $("<td>" + item.description + "</td>").appendTo($tr); $tr.appendTo($tbody); }); - $tbody.appendTo($listmaps); $listmaps.closest(".card").show(); }, server: common.getServer() @@ -157,7 +161,7 @@ define(["jquery", "app/common"], let mode = "advanced"; // Modal form for maps - $(document).on("click", "[data-bs-toggle=\"modal\"]", function () { + $(document).on("click", ".map-link", function () { const item = $(this).data("item"); common.query("getmap", { headers: { @@ -167,6 +171,7 @@ define(["jquery", "app/common"], // Highlighting a large amount of text is unresponsive mode = (new Blob([data[0].data]).size > 5120) ? "basic" : $("input[name=editorMode]:checked").val(); + $("#modalBody").empty(); $("<" + editor[mode].elt + ' id="editor" class="' + editor[mode].class + '" data-id="' + item.map + '"></' + editor[mode].elt + ">").appendTo("#modalBody"); @@ -198,10 +203,9 @@ define(["jquery", "app/common"], errorMessage: "Cannot receive maps data", server: common.getServer() }); - return false; }); $("#modalDialog").on("hidden.bs.modal", () => { - if (editor[mode].codejar) { + if (editor[mode].codejar && jar && typeof jar.destroy === "function") { jar.destroy(); $(".codejar-wrap").remove(); } else { diff --git a/interface/js/app/rspamd.js b/interface/js/app/rspamd.js index 61b7cf155..6d047d6f6 100644 --- a/interface/js/app/rspamd.js +++ b/interface/js/app/rspamd.js @@ -52,7 +52,7 @@ define(["jquery", "app/common", "stickytabs", "visibility", function cleanCredentials() { sessionStorage.clear(); $("#statWidgets").empty(); - $("#listMaps").empty(); + $("#listMaps").children("tbody").empty(); $("#modalBody").empty(); } diff --git a/lualib/lua_maps.lua b/lualib/lua_maps.lua index d4ba30306..c45b51b97 100644 --- a/lualib/lua_maps.lua +++ b/lualib/lua_maps.lua @@ -354,7 +354,7 @@ local function rspamd_map_add_from_ucl(opt, mtype, description, callback) if string.find(opt[1], '^%d') then -- List of numeric stuff (hope it's ipnets definitions) - local map = rspamd_config:radix_from_ucl(opt) + local map = rspamd_config:radix_from_ucl(opt, description) if map then ret.__data = map diff --git a/lualib/lua_scanners/cloudmark.lua b/lualib/lua_scanners/cloudmark.lua index 26a3bf9c4..12a60abf1 100644 --- a/lualib/lua_scanners/cloudmark.lua +++ b/lualib/lua_scanners/cloudmark.lua @@ -173,53 +173,6 @@ local function cloudmark_config(opts) return nil end --- Converts a key-value map to the table representing multipart body, with the following values: --- `data`: data of the part --- `filename`: optional filename --- `content-type`: content type of the element (optional) --- `content-transfer-encoding`: optional CTE header -local function table_to_multipart_body(tbl, boundary) - local seen_data = false - local out = {} - - for k, v in pairs(tbl) do - if v.data then - seen_data = true - table.insert(out, string.format('--%s\r\n', boundary)) - if v.filename then - table.insert(out, - string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n', - k, v.filename)) - else - table.insert(out, - string.format('Content-Disposition: form-data; name="%s"\r\n', k)) - end - if v['content-type'] then - table.insert(out, - string.format('Content-Type: %s\r\n', v['content-type'])) - else - table.insert(out, 'Content-Type: text/plain\r\n') - end - if v['content-transfer-encoding'] then - table.insert(out, - string.format('Content-Transfer-Encoding: %s\r\n', - v['content-transfer-encoding'])) - else - table.insert(out, 'Content-Transfer-Encoding: binary\r\n') - end - table.insert(out, '\r\n') - table.insert(out, v.data) - table.insert(out, '\r\n') - end - end - - if seen_data then - table.insert(out, string.format('--%s--\r\n', boundary)) - end - - return out -end - local function get_specific_symbol(scores_symbols, score) local selected local sel_thr = -1 @@ -359,7 +312,7 @@ local function cloudmark_check(task, content, digest, rule, maybe_part) local request_data = { task = task, url = url, - body = table_to_multipart_body(request, static_boundary), + body = lua_util.table_to_multipart_body(request, static_boundary), headers = { ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary) }, diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua index 62b38c87e..636212b1f 100644 --- a/lualib/lua_util.lua +++ b/lualib/lua_util.lua @@ -1805,4 +1805,55 @@ exports.symbols_priorities = { low = 0, } +---[[[ +-- @function lua_util.table_to_multipart_body(tbl, boundary) +-- Converts a key-value map to the table representing multipart body, with the following values: +-- `data`: data of the part +-- `filename`: optional filename +-- `content-type`: content type of the element (optional) +-- `content-transfer-encoding`: optional CTE header +local function table_to_multipart_body(tbl, boundary) + local seen_data = false + local out = {} + + for k, v in pairs(tbl) do + if v.data then + seen_data = true + table.insert(out, string.format('--%s\r\n', boundary)) + if v.filename then + table.insert(out, + string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n', + k, v.filename)) + else + table.insert(out, + string.format('Content-Disposition: form-data; name="%s"\r\n', k)) + end + if v['content-type'] then + table.insert(out, + string.format('Content-Type: %s\r\n', v['content-type'])) + else + table.insert(out, 'Content-Type: text/plain\r\n') + end + if v['content-transfer-encoding'] then + table.insert(out, + string.format('Content-Transfer-Encoding: %s\r\n', + v['content-transfer-encoding'])) + else + table.insert(out, 'Content-Transfer-Encoding: binary\r\n') + end + table.insert(out, '\r\n') + table.insert(out, v.data) + table.insert(out, '\r\n') + end + end + + if seen_data then + table.insert(out, string.format('--%s--\r\n', boundary)) + end + + return out +end + +exports.table_to_multipart_body = table_to_multipart_body + return exports diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua index 3aba02d86..1b5f55db1 100644 --- a/rules/regexp/headers.lua +++ b/rules/regexp/headers.lua @@ -69,6 +69,14 @@ if rspamd_config:is_mime_utf8() then end end +reconf['R_HTTP_URL_IN_FROM'] = { + re = [[From=/(^|"|'|\s)[hH][tT][tT][pP][sS]?(:|=3A)\/\/\S/H]], + score = 5.0, + mime_only = true, + description = 'HTTP URL preceded by the start of a line, quote, or whitespace, with normal or URL-encoded colons in From header', + group = 'headers' +} + -- Detects that there is no space in From header (e.g. Some Name<some@host>) reconf['R_NO_SPACE_IN_FROM'] = { re = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X', diff --git a/src/controller.c b/src/controller.c index 386448f93..22423e999 100644 --- a/src/controller.c +++ b/src/controller.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -979,12 +979,6 @@ rspamd_controller_handle_maps(struct rspamd_http_connection_entry *conn_ent, if (bk->protocol == MAP_PROTO_FILE) { editable = rspamd_controller_can_edit_map(bk); - - if (!editable && access(bk->uri, R_OK) == -1) { - /* Skip unreadable and non-existing maps */ - continue; - } - obj = ucl_object_typed_new(UCL_OBJECT); ucl_object_insert_key(obj, ucl_object_fromint(bk->id), "map", 0, false); @@ -994,8 +988,34 @@ rspamd_controller_handle_maps(struct rspamd_http_connection_entry *conn_ent, } ucl_object_insert_key(obj, ucl_object_fromstring(bk->uri), "uri", 0, false); + ucl_object_insert_key(obj, ucl_object_fromstring("file"), + "type", 0, false); ucl_object_insert_key(obj, ucl_object_frombool(editable), "editable", 0, false); + ucl_object_insert_key(obj, ucl_object_frombool(bk->shared->loaded), + "loaded", 0, false); + ucl_object_insert_key(obj, ucl_object_frombool(bk->shared->cached), + "cached", 0, false); + ucl_array_append(top, obj); + } + else { + obj = ucl_object_typed_new(UCL_OBJECT); + ucl_object_insert_key(obj, ucl_object_fromint(bk->id), + "map", 0, false); + if (map->description) { + ucl_object_insert_key(obj, ucl_object_fromstring(map->description), + "description", 0, false); + } + ucl_object_insert_key(obj, ucl_object_fromstring(bk->uri), + "uri", 0, false); + ucl_object_insert_key(obj, ucl_object_fromstring(rspamd_map_fetch_protocol_name(bk->protocol)), + "type", 0, false); + ucl_object_insert_key(obj, ucl_object_frombool(false), + "editable", 0, false); + ucl_object_insert_key(obj, ucl_object_frombool(bk->shared->loaded), + "loaded", 0, false); + ucl_object_insert_key(obj, ucl_object_frombool(bk->shared->cached), + "cached", 0, false); ucl_array_append(top, obj); } } @@ -1008,6 +1028,21 @@ rspamd_controller_handle_maps(struct rspamd_http_connection_entry *conn_ent, return 0; } +gboolean +rspamd_controller_map_traverse_callback(gconstpointer key, gconstpointer value, gsize _hits, gpointer ud) +{ + rspamd_fstring_t **target = (rspamd_fstring_t **) ud; + + *target = rspamd_fstring_append(*target, key, strlen(key)); + + if (value) { + *target = rspamd_fstring_append(*target, " ", 1); + *target = rspamd_fstring_append(*target, value, strlen(value)); + } + *target = rspamd_fstring_append(*target, "\n", 1); + + return TRUE; +} /* * Get map command handler: * request: /getmap @@ -1020,7 +1055,7 @@ rspamd_controller_handle_get_map(struct rspamd_http_connection_entry *conn_ent, { struct rspamd_controller_session *session = conn_ent->ud; GList *cur; - struct rspamd_map *map; + struct rspamd_map *map = NULL; struct rspamd_map_backend *bk = NULL; const rspamd_ftok_t *idstr; struct stat st; @@ -1054,7 +1089,7 @@ rspamd_controller_handle_get_map(struct rspamd_http_connection_entry *conn_ent, PTR_ARRAY_FOREACH(map->backends, i, bk) { - if (bk->id == id && bk->protocol == MAP_PROTO_FILE) { + if (bk->id == id) { found = TRUE; break; } @@ -1069,32 +1104,53 @@ rspamd_controller_handle_get_map(struct rspamd_http_connection_entry *conn_ent, return 0; } - if (stat(bk->uri, &st) == -1 || (fd = open(bk->uri, O_RDONLY)) == -1) { + if (bk->protocol == MAP_PROTO_FILE) { + if (stat(bk->uri, &st) == -1 || (fd = open(bk->uri, O_RDONLY)) == -1) { + reply = rspamd_http_new_message(HTTP_RESPONSE); + reply->date = time(NULL); + reply->code = 200; + } + else { + + reply = rspamd_http_new_message(HTTP_RESPONSE); + reply->date = time(NULL); + reply->code = 200; + + if (st.st_size > 0) { + if (!rspamd_http_message_set_body_from_fd(reply, fd)) { + close(fd); + rspamd_http_message_unref(reply); + msg_err_session("cannot read map %s: %s", bk->uri, strerror(errno)); + rspamd_controller_send_error(conn_ent, 500, "Map read error"); + return 0; + } + } + else { + rspamd_fstring_t *empty_body = rspamd_fstring_new_init("", 0); + rspamd_http_message_set_body_from_fstring_steal(reply, empty_body); + } + + close(fd); + } + } + else if (bk->protocol == MAP_PROTO_STATIC) { + /* We can just traverse map and form reply */ reply = rspamd_http_new_message(HTTP_RESPONSE); - reply->date = time(NULL); reply->code = 200; + rspamd_fstring_t *map_body = rspamd_fstring_new(); + rspamd_map_traverse(bk->map, rspamd_controller_map_traverse_callback, &map_body, FALSE); + rspamd_http_message_set_body_from_fstring_steal(reply, map_body); } - else { - + else if (bk->shared->loaded) { reply = rspamd_http_new_message(HTTP_RESPONSE); - reply->date = time(NULL); reply->code = 200; - - if (st.st_size > 0) { - if (!rspamd_http_message_set_body_from_fd(reply, fd)) { - close(fd); - rspamd_http_message_unref(reply); - msg_err_session("cannot read map %s: %s", bk->uri, strerror(errno)); - rspamd_controller_send_error(conn_ent, 500, "Map read error"); - return 0; - } - } - else { - rspamd_fstring_t *empty_body = rspamd_fstring_new_init("", 0); - rspamd_http_message_set_body_from_fstring_steal(reply, empty_body); - } - - close(fd); + rspamd_fstring_t *map_body = rspamd_fstring_new(); + rspamd_map_traverse(bk->map, rspamd_controller_map_traverse_callback, &map_body, FALSE); + rspamd_http_message_set_body_from_fstring_steal(reply, map_body); + } + else { + reply = rspamd_http_new_message(HTTP_RESPONSE); + reply->code = 404; } rspamd_http_connection_reset(conn_ent->conn); diff --git a/src/libserver/fuzzy_backend/fuzzy_backend_redis.c b/src/libserver/fuzzy_backend/fuzzy_backend_redis.c index 27c663070..f150d48be 100644 --- a/src/libserver/fuzzy_backend/fuzzy_backend_redis.c +++ b/src/libserver/fuzzy_backend/fuzzy_backend_redis.c @@ -116,11 +116,9 @@ rspamd_redis_get_servers(struct rspamd_fuzzy_backend_redis *ctx, res = *((struct upstream_list **) lua_touserdata(L, -1)); } else { - struct lua_logger_trace tr; char outbuf[8192]; - memset(&tr, 0, sizeof(tr)); - lua_logger_out_type(L, -2, outbuf, sizeof(outbuf) - 1, &tr, + lua_logger_out(L, -2, outbuf, sizeof(outbuf), LUA_ESCAPE_UNPRINTABLE); msg_err("cannot get %s upstreams for Redis fuzzy storage %s; table content: %s", diff --git a/src/libserver/maps/map.c b/src/libserver/maps/map.c index 97130ad7c..51390f24b 100644 --- a/src/libserver/maps/map.c +++ b/src/libserver/maps/map.c @@ -339,6 +339,7 @@ http_map_finish(struct rspamd_http_connection *conn, cbd->periodic->cur_backend = 0; /* Reset cache, old cached data will be cleaned on timeout */ g_atomic_int_set(&data->cache->available, 0); + g_atomic_int_set(&bk->shared->loaded, 0); data->cur_cache_cbd = NULL; rspamd_map_process_periodic(cbd->periodic); @@ -424,6 +425,8 @@ http_map_finish(struct rspamd_http_connection *conn, * We know that a map is in the locked state */ g_atomic_int_set(&data->cache->available, 1); + g_atomic_int_set(&bk->shared->loaded, 1); + g_atomic_int_set(&bk->shared->cached, 0); /* Store cached data */ rspamd_strlcpy(data->cache->shmem_name, cbd->shmem_data->shm_name, sizeof(data->cache->shmem_name)); @@ -919,6 +922,8 @@ read_map_file(struct rspamd_map *map, struct file_map_data *data, map->read_callback(NULL, 0, &periodic->cbdata, TRUE); } + g_atomic_int_set(&bk->shared->loaded, 1); + return TRUE; } @@ -1003,6 +1008,7 @@ read_map_static(struct rspamd_map *map, struct static_map_data *data, } data->processed = TRUE; + g_atomic_int_set(&bk->shared->loaded, 1); return TRUE; } @@ -1011,6 +1017,7 @@ static void rspamd_map_periodic_dtor(struct map_periodic_cbdata *periodic) { struct rspamd_map *map; + struct rspamd_map_backend *bk; map = periodic->map; msg_debug_map("periodic dtor %p; need_modify=%d", periodic, periodic->need_modify); @@ -1028,8 +1035,11 @@ rspamd_map_periodic_dtor(struct map_periodic_cbdata *periodic) } if (periodic->locked) { - g_atomic_int_set(periodic->map->locked, 0); - msg_debug_map("unlocked map %s", periodic->map->name); + if (periodic->cur_backend < map->backends->len) { + bk = (struct rspamd_map_backend *) g_ptr_array_index(map->backends, periodic->cur_backend); + g_atomic_int_set(&bk->shared->locked, 0); + msg_debug_map("unlocked map %s", map->name); + } if (periodic->map->wrk->state == rspamd_worker_state_running) { rspamd_map_schedule_periodic(periodic->map, @@ -1438,6 +1448,9 @@ rspamd_map_read_cached(struct rspamd_map *map, struct rspamd_map_backend *bk, map->read_callback(in, len, &periodic->cbdata, TRUE); } + g_atomic_int_set(&bk->shared->loaded, 1); + g_atomic_int_set(&bk->shared->cached, 1); + munmap(in, mmap_len); return TRUE; @@ -1727,6 +1740,8 @@ rspamd_map_read_http_cached_file(struct rspamd_map *map, struct tm tm; char ncheck_buf[32], lm_buf[32]; + g_atomic_int_set(&bk->shared->loaded, 1); + g_atomic_int_set(&bk->shared->cached, 1); rspamd_localtime(map->next_check, &tm); strftime(ncheck_buf, sizeof(ncheck_buf) - 1, "%Y-%m-%d %H:%M:%S", &tm); rspamd_localtime(htdata->last_modified, &tm); @@ -2027,8 +2042,20 @@ rspamd_map_process_periodic(struct map_periodic_cbdata *cbd) map = cbd->map; map->scheduled_check = NULL; + /* For each backend we need to check for modifications */ + if (cbd->cur_backend >= cbd->map->backends->len) { + /* Last backend */ + msg_debug_map("finished map: %d of %d", cbd->cur_backend, + cbd->map->backends->len); + MAP_RELEASE(cbd, "periodic"); + + return; + } + + bk = g_ptr_array_index(map->backends, cbd->cur_backend); + if (!map->file_only && !cbd->locked) { - if (!g_atomic_int_compare_and_exchange(cbd->map->locked, + if (!g_atomic_int_compare_and_exchange(&bk->shared->locked, 0, 1)) { msg_debug_map( "don't try to reread map %s as it is locked by other process, " @@ -2040,7 +2067,7 @@ rspamd_map_process_periodic(struct map_periodic_cbdata *cbd) return; } else { - msg_debug_map("locked map %s", cbd->map->name); + msg_debug_map("locked map %s", map->name); cbd->locked = TRUE; } } @@ -2050,7 +2077,7 @@ rspamd_map_process_periodic(struct map_periodic_cbdata *cbd) rspamd_map_schedule_periodic(cbd->map, RSPAMD_MAP_SCHEDULE_ERROR); if (cbd->locked) { - g_atomic_int_set(cbd->map->locked, 0); + g_atomic_int_set(&bk->shared->locked, 0); cbd->locked = FALSE; } @@ -2064,19 +2091,7 @@ rspamd_map_process_periodic(struct map_periodic_cbdata *cbd) return; } - /* For each backend we need to check for modifications */ - if (cbd->cur_backend >= cbd->map->backends->len) { - /* Last backend */ - msg_debug_map("finished map: %d of %d", cbd->cur_backend, - cbd->map->backends->len); - MAP_RELEASE(cbd, "periodic"); - - return; - } - if (cbd->map->wrk && cbd->map->wrk->state == rspamd_worker_state_running) { - bk = g_ptr_array_index(cbd->map->backends, cbd->cur_backend); - g_assert(bk != NULL); if (cbd->need_modify) { /* Load data from the next backend */ @@ -2781,9 +2796,8 @@ rspamd_map_parse_backend(struct rspamd_config *cfg, const char *map_line) bk->data.sd = sdata; } - bk->id = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_T1HA, - bk->uri, strlen(bk->uri), - 0xdeadbabe); + bk->shared = rspamd_mempool_alloc0_shared(cfg->cfg_pool, + sizeof(struct rspamd_map_shared_backend_data)); return bk; @@ -2815,6 +2829,13 @@ rspamd_map_calculate_hash(struct rspamd_map *map) rspamd_cryptobox_hash_init(&st, NULL, 0); + if (map->name) { + rspamd_cryptobox_hash_update(&st, map->name, strlen(map->name)); + } + if (map->description) { + rspamd_cryptobox_hash_update(&st, map->description, strlen(map->description)); + } + for (i = 0; i < map->backends->len; i++) { bk = g_ptr_array_index(map->backends, i); rspamd_cryptobox_hash_update(&st, bk->uri, strlen(bk->uri)); @@ -2823,6 +2844,26 @@ rspamd_map_calculate_hash(struct rspamd_map *map) rspamd_cryptobox_hash_final(&st, cksum); cksum_encoded = rspamd_encode_base32(cksum, sizeof(cksum), RSPAMD_BASE32_DEFAULT); rspamd_strlcpy(map->tag, cksum_encoded, sizeof(map->tag)); + + for (i = 0; i < map->backends->len; i++) { + bk = g_ptr_array_index(map->backends, i); + + /* Also update each backend */ + rspamd_cryptobox_fast_hash_state_t hst; + rspamd_cryptobox_fast_hash_init(&hst, 0); + rspamd_cryptobox_fast_hash_update(&hst, bk->uri, strlen(bk->uri)); + rspamd_cryptobox_fast_hash_update(&hst, map->tag, sizeof(map->tag)); + + if (bk->protocol == MAP_PROTO_STATIC) { + /* Static maps content is pre-defined */ + rspamd_cryptobox_fast_hash_update(&hst, bk->data.sd->data, + bk->data.sd->len); + } + + /* We use only 52 bits to be compatible with other numbers representation */ + bk->id = rspamd_cryptobox_fast_hash_final(&hst) & ~(0xFFFULL << 52); + } + g_free(cksum_encoded); } @@ -2888,8 +2929,6 @@ rspamd_map_add(struct rspamd_config *cfg, map->user_data = user_data; map->cfg = cfg; map->id = rspamd_random_uint64_fast(); - map->locked = - rspamd_mempool_alloc0_shared(cfg->cfg_pool, sizeof(int)); map->backends = g_ptr_array_sized_new(1); map->wrk = worker; rspamd_mempool_add_destructor(cfg->cfg_pool, rspamd_ptr_array_free_hard, @@ -2988,8 +3027,6 @@ rspamd_map_add_from_ucl(struct rspamd_config *cfg, map->user_data = user_data; map->cfg = cfg; map->id = rspamd_random_uint64_fast(); - map->locked = - rspamd_mempool_alloc0_shared(cfg->cfg_pool, sizeof(int)); map->backends = g_ptr_array_new(); map->wrk = worker; map->no_file_read = (flags & RSPAMD_MAP_FILE_NO_READ); @@ -3108,7 +3145,7 @@ rspamd_map_add_from_ucl(struct rspamd_config *cfg, goto err; } - gboolean all_local = TRUE; + gboolean all_local = TRUE, all_loaded = TRUE; PTR_ARRAY_FOREACH(map->backends, i, bk) { @@ -3127,9 +3164,8 @@ rspamd_map_add_from_ucl(struct rspamd_config *cfg, map_data = g_string_sized_new(32); if (rspamd_map_add_static_string(cfg, elt, map_data)) { - bk->data.sd->data = map_data->str; bk->data.sd->len = map_data->len; - g_string_free(map_data, FALSE); + bk->data.sd->data = (unsigned char *) g_string_free(map_data, FALSE); } else { g_string_free(map_data, TRUE); @@ -3152,13 +3188,16 @@ rspamd_map_add_from_ucl(struct rspamd_config *cfg, } ucl_object_iterate_free(it); - bk->data.sd->data = map_data->str; bk->data.sd->len = map_data->len; - g_string_free(map_data, FALSE); + bk->data.sd->data = (unsigned char *) g_string_free(map_data, FALSE); } } else if (bk->protocol != MAP_PROTO_FILE) { all_local = FALSE; + all_loaded = FALSE; /* Will be loaded later */ + } + else { + all_loaded = FALSE; /* Will be loaded later (even for files) */ } } @@ -3167,6 +3206,11 @@ rspamd_map_add_from_ucl(struct rspamd_config *cfg, cfg->map_file_watch_multiplier); } + if (all_loaded) { + /* Static map */ + g_atomic_int_set(&bk->shared->loaded, 1); + } + rspamd_map_calculate_hash(map); msg_debug_map("added map from ucl"); diff --git a/src/libserver/maps/map_private.h b/src/libserver/maps/map_private.h index d0b22fe36..66949f926 100644 --- a/src/libserver/maps/map_private.h +++ b/src/libserver/maps/map_private.h @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,6 +54,23 @@ enum fetch_proto { MAP_PROTO_STATIC }; +static const char * +rspamd_map_fetch_protocol_name(enum fetch_proto proto) +{ + switch (proto) { + case MAP_PROTO_FILE: + return "file"; + case MAP_PROTO_HTTP: + return "http"; + case MAP_PROTO_HTTPS: + return "https"; + case MAP_PROTO_STATIC: + return "static"; + default: + return "unknown"; + } +} + /** * Data specific to file maps */ @@ -76,7 +93,7 @@ struct rspamd_http_map_cached_cbdata { time_t last_checked; }; -struct rspamd_map_cachepoint { +struct rspamd_http_map_cache { int available; gsize len; time_t last_modified; @@ -88,7 +105,7 @@ struct rspamd_map_cachepoint { */ struct http_map_data { /* Shared cache data */ - struct rspamd_map_cachepoint *cache; + struct rspamd_http_map_cache *cache; /* Non-shared for cache owner, used to cleanup cache */ struct rspamd_http_map_cached_cbdata *cur_cache_cbd; char *userinfo; @@ -117,14 +134,23 @@ union rspamd_map_backend_data { struct rspamd_map; +/* + * Shared between workers + */ +struct rspamd_map_shared_backend_data { + int locked; + int loaded; + int cached; +}; struct rspamd_map_backend { enum fetch_proto protocol; gboolean is_signed; gboolean is_compressed; gboolean is_fallback; + struct rspamd_map_shared_backend_data *shared; struct rspamd_map *map; struct ev_loop *event_loop; - uint32_t id; + uint64_t id; struct rspamd_cryptobox_pubkey *trusted_pubkey; union rspamd_map_backend_data data; char *uri; @@ -167,8 +193,6 @@ struct rspamd_map { bool static_only; /* No need to check */ bool no_file_read; /* Do not read files */ bool seen; /* This map has already been watched or pre-loaded */ - /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */ - int *locked; char tag[MEMPOOL_UID_LEN]; }; diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index a29444394..5819da8cb 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -538,8 +538,7 @@ enum lua_logger_escape_type { * @param len * @return */ -gsize lua_logger_out_type(lua_State *L, int pos, char *outbuf, - gsize len, struct lua_logger_trace *trace, +gsize lua_logger_out(lua_State *L, int pos, char *outbuf, gsize len, enum lua_logger_escape_type esc_type); /** diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index 07ed58ad5..f52eae44f 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -118,7 +118,7 @@ local function foo(task) end */ /*** -* @method rspamd_config:radix_from_ucl(obj) +* @method rspamd_config:radix_from_ucl(obj, description) * Creates new embedded map of IP/mask addresses from object. * @param {ucl} obj object * @return {map} radix tree object diff --git a/src/lua/lua_logger.c b/src/lua/lua_logger.c index 004b82e72..8f2aa5be1 100644 --- a/src/lua/lua_logger.c +++ b/src/lua/lua_logger.c @@ -174,6 +174,11 @@ static const struct luaL_reg loggerlib_f[] = { {"__tostring", rspamd_lua_class_tostring}, {NULL, NULL}}; +static gsize +lua_logger_out_type(lua_State *L, int pos, char *outbuf, + gsize len, struct lua_logger_trace *trace, + enum lua_logger_escape_type esc_type); + static void lua_common_log_line(GLogLevelFlags level, lua_State *L, @@ -203,23 +208,19 @@ lua_common_log_line(GLogLevelFlags level, d.currentline); } - rspamd_common_log_function(NULL, - level, - module, - uid, - func_buf, - "%s", - msg); + p = func_buf; } else { - rspamd_common_log_function(NULL, - level, - module, - uid, - G_STRFUNC, - "%s", - msg); + p = (char *) G_STRFUNC; } + + rspamd_common_log_function(NULL, + level, + module, + uid, + p, + "%s", + msg); } /*** Logger interface ***/ @@ -279,105 +280,139 @@ lua_logger_char_safe(int t, unsigned int esc_type) return true; } +/* Could return negative value in case of wrong argument number */ +static glong +lua_logger_log_format_str(lua_State *L, int offset, char *logbuf, gsize remain, + const char *fmt, + enum lua_logger_escape_type esc_type) +{ + const char *c; + gsize r; + int digit; + + char *d = logbuf; + unsigned int arg_num, cur_arg = 0, arg_max = lua_gettop(L) - offset; + + while (remain > 1 && *fmt) { + if (*fmt == '%') { + ++fmt; + c = fmt; + if (*fmt == 's') { + ++fmt; + ++cur_arg; + } else { + arg_num = 0; + while ((digit = g_ascii_digit_value(*fmt)) >= 0) { + ++fmt; + arg_num = arg_num * 10 + digit; + if (arg_num >= 100) { + /* Avoid ridiculously large numbers */ + fmt = c; + break; + } + } + + if (fmt > c) { + /* Update the current argument */ + cur_arg = arg_num; + } + } + + if (fmt > c) { + if (cur_arg < 1 || cur_arg > arg_max) { + *d = 0; + return -((glong) cur_arg + 1); /* wrong argument number */ + } + + r = lua_logger_out(L, offset + cur_arg, d, remain, esc_type); + g_assert(r < remain); + remain -= r; + d += r; + continue; + } + + /* Copy % */ + --fmt; + } + + *d++ = *fmt++; + --remain; + } + + *d = 0; + + return d - logbuf; +} + static gsize lua_logger_out_str(lua_State *L, int pos, char *outbuf, gsize len, - struct lua_logger_trace *trace, enum lua_logger_escape_type esc_type) { - gsize slen, flen; - const char *str = lua_tolstring(L, pos, &slen); static const char hexdigests[16] = "0123456789abcdef"; - gsize r = 0, s; + gsize slen; + const unsigned char *str = lua_tolstring(L, pos, &slen); + unsigned char c; + char *out = outbuf; if (str) { - gboolean normal = TRUE; - flen = MIN(slen, len - 1); - - for (r = 0; r < flen; r++) { - if (!lua_logger_char_safe(str[r], esc_type)) { - normal = FALSE; - break; + while (slen > 0 && len > 1) { + c = *str++; + if (lua_logger_char_safe(c, esc_type)) { + *out++ = c; } - } - - if (normal) { - r = rspamd_strlcpy(outbuf, str, flen + 1); - } - else { - /* Need to escape non-printed characters */ - r = 0; - s = 0; - - while (slen > 0 && len > 1) { - if (!lua_logger_char_safe(str[s], esc_type)) { - if (len >= 3) { - outbuf[r++] = '\\'; - outbuf[r++] = hexdigests[((str[s] >> 4) & 0xF)]; - outbuf[r++] = hexdigests[((str[s]) & 0xF)]; - - len -= 2; - } - else { - outbuf[r++] = '?'; - } - } - else { - outbuf[r++] = str[s]; - } - - s++; - slen--; - len--; + else if (len > 3) { + /* Need to escape non-printed characters */ + *out++ = '\\'; + *out++ = hexdigests[c >> 4]; + *out++ = hexdigests[c & 0xF]; + len -= 2; } - - outbuf[r] = '\0'; + else { + *out++ = '?'; + } + --slen; + --len; } } + *out = 0; - return r; + return out - outbuf; } static gsize -lua_logger_out_num(lua_State *L, int pos, char *outbuf, gsize len, - struct lua_logger_trace *trace) +lua_logger_out_num(lua_State *L, int pos, char *outbuf, gsize len) { double num = lua_tonumber(L, pos); - glong inum; - gsize r = 0; + glong inum = (glong) num; - if ((double) (glong) num == num) { - inum = num; - r = rspamd_snprintf(outbuf, len + 1, "%l", inum); - } - else { - r = rspamd_snprintf(outbuf, len + 1, "%f", num); + if ((double) inum == num) { + return rspamd_snprintf(outbuf, len, "%l", inum); } - return r; + return rspamd_snprintf(outbuf, len, "%f", num); } static gsize -lua_logger_out_boolean(lua_State *L, int pos, char *outbuf, gsize len, - struct lua_logger_trace *trace) +lua_logger_out_boolean(lua_State *L, int pos, char *outbuf, gsize len) { gboolean val = lua_toboolean(L, pos); - gsize r = 0; - - r = rspamd_strlcpy(outbuf, val ? "true" : "false", len + 1); - return r; + return rspamd_snprintf(outbuf, len, val ? "true" : "false"); } static gsize -lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len, - struct lua_logger_trace *trace) +lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len) { - int r = 0, top; + gsize r = 0; + int top; const char *str = NULL; gboolean converted_to_str = FALSE; top = lua_gettop(L); + if (pos < 0) { + pos += top + 1; /* Convert to absolute */ + } if (!lua_getmetatable(L, pos)) { return 0; @@ -396,26 +431,17 @@ lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len, if (lua_isfunction(L, -1)) { lua_pushvalue(L, pos); - if (lua_pcall(L, 1, 1, 0) != 0) { - lua_settop(L, top); - - return 0; - } - - str = lua_tostring(L, -1); - - if (str) { - r = rspamd_snprintf(outbuf, len, "%s", str); + if (lua_pcall(L, 1, 1, 0) == 0) { + str = lua_tostring(L, -1); + if (str) { + r = rspamd_snprintf(outbuf, len, "%s", str); + } } - - lua_settop(L, top); - - return r; } } lua_settop(L, top); - return 0; + return r; } lua_pushstring(L, "__tostring"); @@ -460,12 +486,12 @@ lua_logger_out_userdata(lua_State *L, int pos, char *outbuf, gsize len, return r; } -#define MOVE_BUF(d, remain, r) \ +#define MOVE_BUF(d, remain, r) \ (d) += (r); \ (remain) -= (r); \ - if ((remain) == 0) { \ - lua_settop(L, old_top); \ - break; \ + if ((remain) <= 1) { \ + lua_settop(L, top); \ + goto table_oob; \ } static gsize @@ -473,169 +499,154 @@ lua_logger_out_table(lua_State *L, int pos, char *outbuf, gsize len, struct lua_logger_trace *trace, enum lua_logger_escape_type esc_type) { - char *d = outbuf; - gsize remain = len, r; + char *d = outbuf, *str; + gsize remain = len; + glong r; gboolean first = TRUE; gconstpointer self = NULL; - int i, tpos, last_seq = -1, old_top; + int i, last_seq = 0, top; + double num; + glong inum; - if (!lua_istable(L, pos) || remain == 0) { - return 0; - } + /* Type and length checks are done in logger_out_type() */ - old_top = lua_gettop(L); self = lua_topointer(L, pos); /* Check if we have seen this pointer */ for (i = 0; i < TRACE_POINTS; i++) { if (trace->traces[i] == self) { - r = rspamd_snprintf(d, remain + 1, "ref(%p)", self); - - d += r; - - return (d - outbuf); + if ((trace->cur_level + TRACE_POINTS - 1) % TRACE_POINTS == i) { + return rspamd_snprintf(d, remain, "__self"); + } + return rspamd_snprintf(d, remain, "ref(%p)", self); } } trace->traces[trace->cur_level % TRACE_POINTS] = self; + ++trace->cur_level; - lua_pushvalue(L, pos); - r = rspamd_snprintf(d, remain + 1, "{"); - remain -= r; - d += r; + top = lua_gettop(L); + if (pos < 0) { + pos += top + 1; /* Convert to absolute */ + } + + r = rspamd_snprintf(d, remain, "{"); + MOVE_BUF(d, remain, r); /* Get numeric keys (ipairs) */ for (i = 1;; i++) { - lua_rawgeti(L, -1, i); + lua_rawgeti(L, pos, i); if (lua_isnil(L, -1)) { lua_pop(L, 1); + last_seq = i; break; } - last_seq = i; - - if (!first) { - r = rspamd_snprintf(d, remain + 1, ", "); - MOVE_BUF(d, remain, r); + if (first) { + first = FALSE; + str = "[%d] = "; + } else { + str = ", [%d] = "; } - - r = rspamd_snprintf(d, remain + 1, "[%d] = ", i); + r = rspamd_snprintf(d, remain, str, i); MOVE_BUF(d, remain, r); - tpos = lua_gettop(L); - if (lua_topointer(L, tpos) == self) { - r = rspamd_snprintf(d, remain + 1, "__self"); - } - else { - r = lua_logger_out_type(L, tpos, d, remain, trace, esc_type); - } + r = lua_logger_out_type(L, -1, d, remain, trace, esc_type); MOVE_BUF(d, remain, r); - first = FALSE; lua_pop(L, 1); } /* Get string keys (pairs) */ - for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { + for (lua_pushnil(L); lua_next(L, pos); lua_pop(L, 1)) { /* 'key' is at index -2 and 'value' is at index -1 */ - if (lua_type(L, -2) == LUA_TNUMBER) { - if (last_seq > 0) { - lua_pushvalue(L, -2); - if (lua_tonumber(L, -1) <= last_seq + 1) { - lua_pop(L, 1); + /* Preserve key */ + lua_pushvalue(L, -2); + if (last_seq > 0) { + if (lua_type(L, -1) == LUA_TNUMBER) { + num = lua_tonumber(L, -1); /* no conversion here */ + inum = (glong) num; + if ((double) inum == num && inum > 0 && inum < last_seq) { /* Already seen */ + lua_pop(L, 1); continue; } - - lua_pop(L, 1); } } - if (!first) { - r = rspamd_snprintf(d, remain + 1, ", "); - MOVE_BUF(d, remain, r); + if (first) { + first = FALSE; + str = "[%2] = %1"; + } else { + str = ", [%2] = %1"; } - - /* Preserve key */ - lua_pushvalue(L, -2); - r = rspamd_snprintf(d, remain + 1, "[%s] = ", - lua_tostring(L, -1)); - lua_pop(L, 1); /* Remove key */ - MOVE_BUF(d, remain, r); - tpos = lua_gettop(L); - - if (lua_topointer(L, tpos) == self) { - r = rspamd_snprintf(d, remain + 1, "__self"); - } - else { - r = lua_logger_out_type(L, tpos, d, remain, trace, esc_type); + r = lua_logger_log_format_str(L, top + 1, d, remain, str, esc_type); + if (r < 0) { + /* should not happen */ + goto table_oob; } MOVE_BUF(d, remain, r); - first = FALSE; + /* Remove key */ + lua_pop(L, 1); } - lua_settop(L, old_top); - - r = rspamd_snprintf(d, remain + 1, "}"); + r = rspamd_snprintf(d, remain, "}"); d += r; +table_oob: + --trace->cur_level; + return (d - outbuf); } #undef MOVE_BUF -gsize lua_logger_out_type(lua_State *L, int pos, +static gsize +lua_logger_out_type(lua_State *L, int pos, char *outbuf, gsize len, struct lua_logger_trace *trace, enum lua_logger_escape_type esc_type) { - int type; - gsize r = 0; - if (len == 0) { return 0; } - type = lua_type(L, pos); - trace->cur_level++; + int type = lua_type(L, pos); switch (type) { case LUA_TNUMBER: - r = lua_logger_out_num(L, pos, outbuf, len, trace); - break; + return lua_logger_out_num(L, pos, outbuf, len); case LUA_TBOOLEAN: - r = lua_logger_out_boolean(L, pos, outbuf, len, trace); - break; + return lua_logger_out_boolean(L, pos, outbuf, len); case LUA_TTABLE: - r = lua_logger_out_table(L, pos, outbuf, len, trace, esc_type); - break; + return lua_logger_out_table(L, pos, outbuf, len, trace, esc_type); case LUA_TUSERDATA: - r = lua_logger_out_userdata(L, pos, outbuf, len, trace); - break; + return lua_logger_out_userdata(L, pos, outbuf, len); case LUA_TFUNCTION: - r = rspamd_snprintf(outbuf, len + 1, "function"); - break; + return rspamd_snprintf(outbuf, len, "function"); case LUA_TLIGHTUSERDATA: - r = rspamd_snprintf(outbuf, len + 1, "0x%p", lua_topointer(L, pos)); - break; + return rspamd_snprintf(outbuf, len, "0x%p", lua_topointer(L, pos)); case LUA_TNIL: - r = rspamd_snprintf(outbuf, len + 1, "nil"); - break; + return rspamd_snprintf(outbuf, len, "nil"); case LUA_TNONE: - r = rspamd_snprintf(outbuf, len + 1, "no value"); - break; - default: - /* Try to push everything as string using tostring magic */ - r = lua_logger_out_str(L, pos, outbuf, len, trace, esc_type); - break; + return rspamd_snprintf(outbuf, len, "no value"); } - trace->cur_level--; + /* Try to push everything as string using tostring magic */ + return lua_logger_out_str(L, pos, outbuf, len, esc_type); +} - return r; +gsize lua_logger_out(lua_State *L, int pos, + char *outbuf, gsize len, + enum lua_logger_escape_type esc_type) +{ + struct lua_logger_trace tr; + memset(&tr, 0, sizeof(tr)); + + return lua_logger_out_type(L, pos, outbuf, len, &tr, esc_type); } static const char * @@ -731,72 +742,16 @@ static gboolean lua_logger_log_format(lua_State *L, int fmt_pos, gboolean is_string, char *logbuf, gsize remain) { - char *d; - const char *s, *c; - gsize r; - unsigned int arg_num, arg_max, cur_arg; - struct lua_logger_trace tr; - int digit; - - s = lua_tostring(L, fmt_pos); - if (s == NULL) { + const char *fmt = lua_tostring(L, fmt_pos); + if (fmt == NULL) { return FALSE; } - arg_max = (unsigned int) lua_gettop(L) - fmt_pos; - d = logbuf; - cur_arg = 0; - - while (remain > 0 && *s) { - if (*s == '%') { - ++s; - c = s; - if (*s == 's') { - ++s; - ++cur_arg; - } else { - arg_num = 0; - while ((digit = g_ascii_digit_value(*s)) >= 0) { - ++s; - arg_num = arg_num * 10 + digit; - if (arg_num >= 100) { - /* Avoid ridiculously large numbers */ - s = c; - break; - } - } - - if (s > c) { - /* Update the current argument */ - cur_arg = arg_num; - } - } - - if (s > c) { - if (cur_arg < 1 || cur_arg > arg_max) { - msg_err("wrong argument number: %ud", cur_arg); - return FALSE; - } - - memset(&tr, 0, sizeof(tr)); - r = lua_logger_out_type(L, fmt_pos + cur_arg, d, remain, &tr, - is_string ? LUA_ESCAPE_UNPRINTABLE : LUA_ESCAPE_LOG); - g_assert(r <= remain); - remain -= r; - d += r; - continue; - } - - /* Copy % */ - --s; - } - - *d++ = *s++; - --remain; + glong ret = lua_logger_log_format_str(L, fmt_pos, logbuf, remain, fmt, is_string ? LUA_ESCAPE_UNPRINTABLE : LUA_ESCAPE_LOG); + if (ret < 0) { + msg_err("wrong argument number: %ud", -((int) ret + 1)); + return FALSE; } - - *d = '\0'; - return TRUE; } @@ -808,15 +763,10 @@ lua_logger_do_log(lua_State *L, { char logbuf[RSPAMD_LOGBUF_SIZE - 128]; const char *uid = NULL; - int fmt_pos = start_pos; int ret; - GError *err = NULL; - if (lua_type(L, start_pos) == LUA_TSTRING) { - fmt_pos = start_pos; - } - else if (lua_type(L, start_pos) == LUA_TUSERDATA) { - fmt_pos = start_pos + 1; + if (lua_type(L, start_pos) == LUA_TUSERDATA) { + GError *err = NULL; uid = lua_logger_get_id(L, start_pos, &err); @@ -830,15 +780,17 @@ lua_logger_do_log(lua_State *L, return ret; } + + ++start_pos; } - else { + + if (lua_type(L, start_pos) != LUA_TSTRING) { /* Bad argument type */ return luaL_error(L, "bad format string type: %s", lua_typename(L, lua_type(L, start_pos))); } - ret = lua_logger_log_format(L, fmt_pos, is_string, - logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, start_pos, is_string, logbuf, sizeof(logbuf)); if (ret) { if (is_string) { @@ -849,12 +801,9 @@ lua_logger_do_log(lua_State *L, lua_common_log_line(level, L, logbuf, uid, "lua", 1); } } - else { - if (is_string) { - lua_pushnil(L); - - return 1; - } + else if (is_string) { + lua_pushnil(L); + return 1; } return 0; @@ -917,11 +866,11 @@ lua_logger_logx(lua_State *L) if (uid && modname) { if (lua_type(L, 4) == LUA_TSTRING) { - ret = lua_logger_log_format(L, 4, FALSE, logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, 4, FALSE, logbuf, sizeof(logbuf)); } else if (lua_type(L, 4) == LUA_TNUMBER) { stack_pos = lua_tonumber(L, 4); - ret = lua_logger_log_format(L, 5, FALSE, logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, 5, FALSE, logbuf, sizeof(logbuf)); } else { return luaL_error(L, "invalid argument on pos 4"); @@ -959,11 +908,11 @@ lua_logger_debugm(lua_State *L) if (uid && module) { if (lua_type(L, 3) == LUA_TSTRING) { - ret = lua_logger_log_format(L, 3, FALSE, logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, 3, FALSE, logbuf, sizeof(logbuf)); } else if (lua_type(L, 3) == LUA_TNUMBER) { stack_pos = lua_tonumber(L, 3); - ret = lua_logger_log_format(L, 4, FALSE, logbuf, sizeof(logbuf) - 1); + ret = lua_logger_log_format(L, 4, FALSE, logbuf, sizeof(logbuf)); } else { return luaL_error(L, "invalid argument on pos 3"); diff --git a/src/lua/lua_map.c b/src/lua/lua_map.c index 062613bd7..5f55ece06 100644 --- a/src/lua/lua_map.c +++ b/src/lua/lua_map.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -319,6 +319,11 @@ int lua_config_radix_from_ucl(lua_State *L) ucl_object_insert_key(fake_obj, ucl_object_fromstring("static"), "url", 0, false); + if (lua_type(L, 3) == LUA_TSTRING) { + ucl_object_insert_key(fake_obj, ucl_object_fromstring(lua_tostring(L, 3)), + "description", 0, false); + } + if ((m = rspamd_map_add_from_ucl(cfg, fake_obj, "static radix map", rspamd_radix_read, rspamd_radix_fin, diff --git a/src/plugins/lua/contextal.lua b/src/plugins/lua/contextal.lua new file mode 100644 index 000000000..f6202781a --- /dev/null +++ b/src/plugins/lua/contextal.lua @@ -0,0 +1,332 @@ +--[[ +Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local E = {} +local N = 'contextal' + +if confighelp then + return +end + +local opts = rspamd_config:get_all_opt(N) +if not opts then + return +end + +local lua_redis = require "lua_redis" +local lua_util = require "lua_util" +local redis_cache = require "lua_cache" +local rspamd_http = require "rspamd_http" +local rspamd_logger = require "rspamd_logger" +local rspamd_util = require "rspamd_util" +local ts = require("tableshape").types +local ucl = require "ucl" + +local cache_context, redis_params + +local contextal_actions = { + ['ALERT'] = true, + ['ALLOW'] = true, + ['BLOCK'] = true, + ['QUARANTINE'] = true, + ['SPAM'] = true, +} + +local config_schema = lua_redis.enrich_schema { + action_symbol_prefix = ts.string:is_optional(), + base_url = ts.string:is_optional(), + cache_prefix = ts.string:is_optional(), + cache_timeout = ts.number:is_optional(), + cache_ttl = ts.number:is_optional(), + custom_actions = ts.array_of(ts.string):is_optional(), + defer_if_no_result = ts.boolean:is_optional(), + defer_message = ts.string:is_optional(), + enabled = ts.boolean:is_optional(), + http_timeout = ts.number:is_optional(), + request_ttl = ts.number:is_optional(), + submission_symbol = ts.string:is_optional(), +} + +local settings = { + action_symbol_prefix = 'CONTEXTAL_ACTION', + base_url = 'http://localhost:8080', + cache_prefix = 'CXAL', + cache_timeout = 5, + cache_ttl = 3600, + custom_actions = {}, + defer_if_no_result = false, + defer_message = 'Awaiting deep scan - try again later', + http_timeout = 2, + request_ttl = 4, + submission_symbol = 'CONTEXTAL_SUBMIT', +} + +local static_boundary = rspamd_util.random_hex(32) +local use_request_ttl = true + +local function maybe_defer(task, obj) + if settings.defer_if_no_result and not ((obj or E)[1] or E).actions then + task:set_pre_result('soft reject', settings.defer_message) + end +end + +local function process_actions(task, obj, is_cached) + for _, match in ipairs((obj[1] or E).actions or E) do + local act = match.action + local scenario = match.scenario + if not (act and scenario) then + rspamd_logger.err(task, 'bad result: %s', match) + elseif contextal_actions[act] then + task:insert_result(settings.action_symbol_prefix .. '_' .. act, 1.0, scenario) + else + rspamd_logger.err(task, 'unknown action: %s', act) + end + end + + if not cache_context or is_cached then + maybe_defer(task, obj) + return + end + + local cache_obj + if (obj[1] or E).actions then + cache_obj = {[1] = {["actions"] = obj[1].actions}} + else + local work_id = task:get_mempool():get_variable('contextal_work_id', 'string') + if work_id then + cache_obj = {[1] = {["work_id"] = work_id}} + else + rspamd_logger.err(task, 'no work id found in mempool') + return + end + end + + redis_cache.cache_set(task, + task:get_digest(), + cache_obj, + cache_context) + + maybe_defer(task, obj) +end + +local function process_cached(task, obj) + if (obj[1] or E).actions then + task:disable_symbol(settings.action_symbol_prefix) + return process_actions(task, obj, true) + elseif (obj[1] or E).work_id then + task:get_mempool():set_variable('contextal_work_id', obj[1].work_id) + else + rspamd_logger.err(task, 'bad result (cached): %s', obj) + end +end + +local function action_cb(task) + local work_id = task:get_mempool():get_variable('contextal_work_id', 'string') + if not work_id then + rspamd_logger.err(task, 'no work id found in mempool') + return + end + + local function http_callback(err, code, body, hdrs) + if err then + rspamd_logger.err(task, 'http error: %s', err) + maybe_defer(task) + return + end + if code ~= 200 then + rspamd_logger.err(task, 'bad http code: %s', code) + maybe_defer(task) + return + end + local parser = ucl.parser() + local _, parse_err = parser:parse_string(body) + if parse_err then + rspamd_logger.err(task, 'cannot parse JSON: %s', err) + maybe_defer(task) + return + end + local obj = parser:get_object() + return process_actions(task, obj, false) + end + + rspamd_http.request({ + task = task, + url = settings.actions_url .. work_id, + callback = http_callback, + timeout = settings.http_timeout, + gzip = settings.gzip, + keepalive = settings.keepalive, + no_ssl_verify = settings.no_ssl_verify, + }) +end + +local function submit(task) + + local function http_callback(err, code, body, hdrs) + if err then + rspamd_logger.err(task, 'http error: %s', err) + maybe_defer(task) + return + end + if code ~= 201 then + rspamd_logger.err(task, 'bad http code: %s', code) + maybe_defer(task) + return + end + local parser = ucl.parser() + local _, parse_err = parser:parse_string(body) + if parse_err then + rspamd_logger.err(task, 'cannot parse JSON: %s', err) + maybe_defer(task) + return + end + local obj = parser:get_object() + local work_id = obj.work_id + if work_id then + task:get_mempool():set_variable('contextal_work_id', work_id) + end + task:insert_result(settings.submission_symbol, 1.0, + string.format('work_id=%s', work_id or 'nil')) + task:add_timer(settings.request_ttl, action_cb) + end + + local req = { + object_data = {['data'] = task:get_content()}, + } + if settings.request_ttl then + req.ttl = {['data'] = tostring(settings.request_ttl)} + end + if settings.max_recursion then + req.maxrec = {['data'] = tostring(settings.max_recursion)} + end + rspamd_http.request({ + task = task, + url = settings.submit_url, + body = lua_util.table_to_multipart_body(req, static_boundary), + callback = http_callback, + headers = { + ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary) + }, + timeout = settings.http_timeout, + gzip = settings.gzip, + keepalive = settings.keepalive, + no_ssl_verify = settings.no_ssl_verify, + }) +end + +local function cache_hit(task, err, data) + if err then + rspamd_logger.err(task, 'error getting cache: %s', err) + else + process_cached(task, data) + end +end + +local function submit_cb(task) + if cache_context then + redis_cache.cache_get(task, + task:get_digest(), + cache_context, + settings.cache_timeout, + submit, + cache_hit + ) + else + submit(task) + end +end + +local function set_url_path(base, path) + local slash = base:sub(#base) == '/' and '' or '/' + return base .. slash .. path +end + +settings = lua_util.override_defaults(settings, opts) + +local res, err = config_schema:transform(settings) +if not res then + rspamd_logger.warnx(rspamd_config, 'plugin %s is misconfigured: %s', N, err) + local err_msg = string.format("schema error: %s", res) + lua_util.config_utils.push_config_error(N, err_msg) + lua_util.disable_module(N, "failed", err_msg) + return +end + +for _, k in ipairs(settings.custom_actions) do + contextal_actions[k] = true +end + +if not settings.base_url then + if not (settings.submit_url and settings.actions_url) then + rspamd_logger.err(rspamd_config, 'no URL configured for contextal') + lua_util.disable_module(N, 'config') + return + end +else + if not settings.submit_url then + settings.submit_url = set_url_path(settings.base_url, 'api/v1/submit') + end + if not settings.actions_url then + settings.actions_url = set_url_path(settings.base_url, 'api/v1/actions/') + end +end + +redis_params = lua_redis.parse_redis_server(N) +if redis_params then + cache_context = redis_cache.create_cache_context(redis_params, { + cache_prefix = settings.cache_prefix, + cache_ttl = settings.cache_ttl, + cache_format = 'json', + cache_use_hashing = false + }) +end + +local submission_id = rspamd_config:register_symbol({ + name = settings.submission_symbol, + type = 'normal', + group = N, + callback = submit_cb +}) + +local top_options = rspamd_config:get_all_opt('options') +if settings.request_ttl and settings.request_ttl >= (top_options.task_timeout * 0.8) then + rspamd_logger.warn(rspamd_config, [[request ttl is >= 80% of task timeout, won't wait on processing]]) + use_request_ttl = false +elseif not settings.request_ttl then + use_request_ttl = false +end + +local parent_id +if use_request_ttl then + parent_id = submission_id +else + parent_id = rspamd_config:register_symbol({ + name = settings.action_symbol_prefix, + type = 'postfilter', + priority = lua_util.symbols_priorities.high - 1, + group = N, + callback = action_cb + }) +end + +for k in pairs(contextal_actions) do + rspamd_config:register_symbol({ + name = settings.action_symbol_prefix .. '_' .. k, + parent = parent_id, + type = 'virtual', + group = N, + }) +end diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua index 98a3e38ee..5d1cf5e06 100644 --- a/src/plugins/lua/gpt.lua +++ b/src/plugins/lua/gpt.lua @@ -494,6 +494,7 @@ local function insert_results(task, result, sel_part) rspamd_logger.errx(task, 'no probability in result') return end + if result.probability > 0.5 then task:insert_result('GPT_SPAM', (result.probability - 0.5) * 2, tostring(result.probability)) if settings.autolearn then @@ -504,10 +505,6 @@ local function insert_results(task, result, sel_part) process_categories(task, result.categories) end else - if result.reason and settings.reason_header then - lua_mime.modify_headers(task, - { add = { [settings.reason_header] = { value = 'value', order = 1 } } }) - end task:insert_result('GPT_HAM', (0.5 - result.probability) * 2, tostring(result.probability)) if settings.autolearn then task:set_flag("learn_ham") @@ -516,6 +513,10 @@ local function insert_results(task, result, sel_part) process_categories(task, result.categories) end end + if result.reason and settings.reason_header then + lua_mime.modify_headers(task, + { add = { [settings.reason_header] = { value = tostring(result.reason), order = 1 } } }) + end if cache_context then lua_cache.cache_set(task, redis_cache_key(sel_part), result, cache_context) @@ -958,14 +959,14 @@ if opts then "FROM and url domains. Evaluate spam probability (0-1). " .. "Output ONLY 3 lines:\n" .. "1. Numeric score (0.00-1.00)\n" .. - "2. One-sentence reason citing strongest red flag\n" .. + "2. One-sentence reason citing whether it is spam, the strongest red flag, or why it is ham\n" .. "3. Primary concern category if found from the list: " .. table.concat(lua_util.keys(categories_map), ', ') else settings.prompt = "Analyze this email strictly as a spam detector given the email message, subject, " .. "FROM and url domains. Evaluate spam probability (0-1). " .. "Output ONLY 2 lines:\n" .. "1. Numeric score (0.00-1.00)\n" .. - "2. One-sentence reason citing strongest red flag\n" + "2. One-sentence reason citing whether it is spam, the strongest red flag, or why it is ham\n" end end end diff --git a/src/plugins/lua/hfilter.lua b/src/plugins/lua/hfilter.lua index 6bc011b83..a783565ab 100644 --- a/src/plugins/lua/hfilter.lua +++ b/src/plugins/lua/hfilter.lua @@ -199,9 +199,10 @@ local function check_regexp(str, regexp_text) return re:match(str) end -local function add_static_map(data) +local function add_static_map(data, description) return rspamd_config:add_map { type = 'regexp_multi', + description = description, url = { upstreams = 'static', data = data, @@ -568,16 +569,16 @@ local function append_t(t, a) end end if config['helo_enabled'] then - checks_hello_bareip_map = add_static_map(checks_hello_bareip) - checks_hello_badip_map = add_static_map(checks_hello_badip) - checks_hellohost_map = add_static_map(checks_hellohost) - checks_hello_map = add_static_map(checks_hello) + checks_hello_bareip_map = add_static_map(checks_hello_bareip, 'Hfilter: HELO bare ip') + checks_hello_badip_map = add_static_map(checks_hello_badip, 'Hfilter: HELO bad ip') + checks_hellohost_map = add_static_map(checks_hellohost, 'Hfilter: HELO host') + checks_hello_map = add_static_map(checks_hello, 'Hfilter: HELO') append_t(symbols_enabled, symbols_helo) timeout = math.max(timeout, rspamd_config:get_dns_timeout() * 3) end if config['hostname_enabled'] then if not checks_hellohost_map then - checks_hellohost_map = add_static_map(checks_hellohost) + checks_hellohost_map = add_static_map(checks_hellohost, 'Hfilter: HOSTNAME') end append_t(symbols_enabled, symbols_hostname) timeout = math.max(timeout, rspamd_config:get_dns_timeout()) diff --git a/src/rspamadm/lua_repl.c b/src/rspamadm/lua_repl.c index 1d6da5aa9..41a319de9 100644 --- a/src/rspamadm/lua_repl.c +++ b/src/rspamadm/lua_repl.c @@ -232,7 +232,6 @@ rspamadm_exec_input(lua_State *L, const char *input) int i, cbref; int top = 0; char outbuf[8192]; - struct lua_logger_trace tr; struct thread_entry *thread = lua_thread_pool_get_for_config(rspamd_main->cfg); L = thread->lua_state; @@ -272,8 +271,7 @@ rspamadm_exec_input(lua_State *L, const char *input) rspamd_printf("local function: %d\n", cbref); } else { - memset(&tr, 0, sizeof(tr)); - lua_logger_out_type(L, i, outbuf, sizeof(outbuf) - 1, &tr, + lua_logger_out(L, i, outbuf, sizeof(outbuf), LUA_ESCAPE_UNPRINTABLE); rspamd_printf("%s\n", outbuf); } @@ -393,7 +391,6 @@ rspamadm_lua_message_handler(lua_State *L, int argc, char **argv) gpointer map; gsize len; char outbuf[8192]; - struct lua_logger_trace tr; if (argv[1] == NULL) { rspamd_printf("no callback is specified\n"); @@ -455,8 +452,7 @@ rspamadm_lua_message_handler(lua_State *L, int argc, char **argv) rspamd_printf("lua callback for %s returned:\n", argv[i]); for (j = old_top + 1; j <= lua_gettop(L); j++) { - memset(&tr, 0, sizeof(tr)); - lua_logger_out_type(L, j, outbuf, sizeof(outbuf), &tr, + lua_logger_out(L, j, outbuf, sizeof(outbuf), LUA_ESCAPE_UNPRINTABLE); rspamd_printf("%s\n", outbuf); } |