aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/chartable.cxx24
-rw-r--r--src/plugins/fuzzy_check.c178
-rw-r--r--src/plugins/lua/arc.lua204
-rw-r--r--src/plugins/lua/bayes_expiry.lua182
-rw-r--r--src/plugins/lua/contextal.lua2
-rw-r--r--src/plugins/lua/fuzzy_collect.lua2
-rw-r--r--src/plugins/lua/gpt.lua77
-rw-r--r--src/plugins/lua/hfilter.lua1
-rw-r--r--src/plugins/lua/history_redis.lua6
-rw-r--r--src/plugins/lua/known_senders.lua62
-rw-r--r--src/plugins/lua/milter_headers.lua2
-rw-r--r--src/plugins/lua/mime_types.lua6
-rw-r--r--src/plugins/lua/multimap.lua826
-rw-r--r--src/plugins/lua/ratelimit.lua6
-rw-r--r--src/plugins/lua/rbl.lua2
-rw-r--r--src/plugins/lua/replies.lua26
-rw-r--r--src/plugins/lua/reputation.lua113
-rw-r--r--src/plugins/lua/settings.lua6
-rw-r--r--src/plugins/lua/spamassassin.lua65
-rw-r--r--src/plugins/lua/trie.lua12
-rw-r--r--src/plugins/regexp.c63
21 files changed, 1529 insertions, 336 deletions
diff --git a/src/plugins/chartable.cxx b/src/plugins/chartable.cxx
index a5c7cb899..c82748862 100644
--- a/src/plugins/chartable.cxx
+++ b/src/plugins/chartable.cxx
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1696,7 +1696,7 @@ rspamd_can_alias_latin(int ch)
static double
rspamd_chartable_process_word_utf(struct rspamd_task *task,
- rspamd_stat_token_t *w,
+ rspamd_word_t *w,
gboolean is_url,
unsigned int *ncap,
struct chartable_ctx *chartable_module_ctx,
@@ -1842,7 +1842,7 @@ rspamd_chartable_process_word_utf(struct rspamd_task *task,
static double
rspamd_chartable_process_word_ascii(struct rspamd_task *task,
- rspamd_stat_token_t *w,
+ rspamd_word_t *w,
gboolean is_url,
struct chartable_ctx *chartable_module_ctx)
{
@@ -1931,17 +1931,17 @@ rspamd_chartable_process_part(struct rspamd_task *task,
struct chartable_ctx *chartable_module_ctx,
gboolean ignore_diacritics)
{
- rspamd_stat_token_t *w;
+ rspamd_word_t *w;
unsigned int i, ncap = 0;
double cur_score = 0.0;
- if (part == nullptr || part->utf_words == nullptr ||
- part->utf_words->len == 0 || part->nwords == 0) {
+ if (part == nullptr || part->utf_words.a == nullptr ||
+ kv_size(part->utf_words) == 0 || part->nwords == 0) {
return FALSE;
}
- for (i = 0; i < part->utf_words->len; i++) {
- w = &g_array_index(part->utf_words, rspamd_stat_token_t, i);
+ for (i = 0; i < kv_size(part->utf_words); i++) {
+ w = &kv_A(part->utf_words, i);
if ((w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT)) {
@@ -2015,13 +2015,13 @@ chartable_symbol_callback(struct rspamd_task *task,
ignore_diacritics = TRUE;
}
- if (task->meta_words != nullptr && task->meta_words->len > 0) {
- rspamd_stat_token_t *w;
+ if (task->meta_words.a && kv_size(task->meta_words) > 0) {
+ rspamd_word_t *w;
double cur_score = 0;
- gsize arlen = task->meta_words->len;
+ gsize arlen = kv_size(task->meta_words);
for (i = 0; i < arlen; i++) {
- w = &g_array_index(task->meta_words, rspamd_stat_token_t, i);
+ w = &kv_A(task->meta_words, i);
cur_score += rspamd_chartable_process_word_utf(task, w, FALSE,
nullptr, chartable_module_ctx, ignore_diacritics);
}
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index ece9a91e0..7dd5162ac 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -78,7 +78,8 @@ enum fuzzy_rule_mode {
};
struct fuzzy_rule {
- struct upstream_list *servers;
+ struct upstream_list *read_servers; /* Servers for read operations */
+ struct upstream_list *write_servers; /* Servers for write operations */
const char *symbol;
const char *algorithm_str;
const char *name;
@@ -543,22 +544,68 @@ fuzzy_parse_rule(struct rspamd_config *cfg, const ucl_object_t *obj,
}
if ((value = ucl_object_lookup(obj, "servers")) != NULL) {
- rule->servers = rspamd_upstreams_create(cfg->ups_ctx);
- /* pass max_error and revive_time configuration in upstream for fuzzy storage
- * it allows to configure error_rate threshold and upstream dead timer
- */
- rspamd_upstreams_set_limits(rule->servers,
+ rule->read_servers = rspamd_upstreams_create(cfg->ups_ctx);
+ rspamd_upstreams_set_limits(rule->read_servers,
(double) fuzzy_module_ctx->revive_time, NAN, NAN, NAN,
(unsigned int) fuzzy_module_ctx->max_errors, 0);
rspamd_mempool_add_destructor(cfg->cfg_pool,
(rspamd_mempool_destruct_t) rspamd_upstreams_destroy,
- rule->servers);
- if (!rspamd_upstreams_from_ucl(rule->servers, value, DEFAULT_PORT, NULL)) {
+ rule->read_servers);
+ if (!rspamd_upstreams_from_ucl(rule->read_servers, value, DEFAULT_PORT, NULL)) {
msg_err_config("cannot read servers definition");
return -1;
}
+
+ rule->write_servers = rule->read_servers;
+ }
+ else {
+ /* Check for read_servers and write_servers */
+ gboolean has_read = FALSE, has_write = FALSE;
+
+ if ((value = ucl_object_lookup(obj, "read_servers")) != NULL) {
+ rule->read_servers = rspamd_upstreams_create(cfg->ups_ctx);
+ rspamd_upstreams_set_limits(rule->read_servers,
+ (double) fuzzy_module_ctx->revive_time, NAN, NAN, NAN,
+ (unsigned int) fuzzy_module_ctx->max_errors, 0);
+
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) rspamd_upstreams_destroy,
+ rule->read_servers);
+ if (!rspamd_upstreams_from_ucl(rule->read_servers, value, DEFAULT_PORT, NULL)) {
+ msg_err_config("cannot read read_servers definition");
+ return -1;
+ }
+ has_read = TRUE;
+ }
+
+ if ((value = ucl_object_lookup(obj, "write_servers")) != NULL) {
+ rule->write_servers = rspamd_upstreams_create(cfg->ups_ctx);
+ rspamd_upstreams_set_limits(rule->write_servers,
+ (double) fuzzy_module_ctx->revive_time, NAN, NAN, NAN,
+ (unsigned int) fuzzy_module_ctx->max_errors, 0);
+
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) rspamd_upstreams_destroy,
+ rule->write_servers);
+ if (!rspamd_upstreams_from_ucl(rule->write_servers, value, DEFAULT_PORT, NULL)) {
+ msg_err_config("cannot read write_servers definition");
+ return -1;
+ }
+ has_write = TRUE;
+ }
+
+ /* If we have both read and write servers, we don't need the common servers list */
+ if (has_read && !has_write) {
+ /* Use read_servers for all operations */
+ rule->write_servers = rule->read_servers;
+ }
+ else if (has_write && !has_read) {
+ /* Use write_servers for all operations */
+ rule->read_servers = rule->write_servers;
+ }
}
+
if ((value = ucl_object_lookup(obj, "fuzzy_map")) != NULL) {
it = NULL;
while ((cur = ucl_object_iterate(value, &it, true)) != NULL) {
@@ -636,7 +683,7 @@ fuzzy_parse_rule(struct rspamd_config *cfg, const ucl_object_t *obj,
strlen(shingles_key_str), NULL, 0);
rule->shingles_key->len = 16;
- if (rspamd_upstreams_count(rule->servers) == 0) {
+ if (rspamd_upstreams_count(rule->read_servers) == 0) {
msg_err_config("no servers defined for fuzzy rule with name: %s",
rule->name);
return -1;
@@ -898,6 +945,24 @@ int fuzzy_check_module_init(struct rspamd_config *cfg, struct module_ctx **ctx)
0);
rspamd_rcl_add_doc_by_path(cfg,
"fuzzy_check.rule",
+ "List of servers to check (read-only operations)",
+ "read_servers",
+ UCL_STRING,
+ NULL,
+ 0,
+ NULL,
+ 0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "fuzzy_check.rule",
+ "List of servers to learn (write operations)",
+ "write_servers",
+ UCL_STRING,
+ NULL,
+ 0,
+ NULL,
+ 0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "fuzzy_check.rule",
"If true then never try to learn this fuzzy storage",
"read_only",
UCL_BOOLEAN,
@@ -1249,7 +1314,7 @@ int fuzzy_check_module_config(struct rspamd_config *cfg, bool validate)
LL_FOREACH(value, cur)
{
- if (ucl_object_lookup(cur, "servers")) {
+ if (ucl_object_lookup_any(cur, "servers", "read_servers", "write_servers", NULL) != NULL) {
/* Unnamed rule */
fuzzy_parse_rule(cfg, cur, NULL, cb_id);
nrules++;
@@ -1366,10 +1431,10 @@ fuzzy_io_fin(void *ud)
close(session->fd);
}
-static GArray *
+static rspamd_words_t *
fuzzy_preprocess_words(struct rspamd_mime_text_part *part, rspamd_mempool_t *pool)
{
- return part->utf_words;
+ return &part->utf_words;
}
static void
@@ -1715,26 +1780,30 @@ fuzzy_cmd_write_extensions(struct rspamd_task *task,
struct rspamd_email_address *addr = g_ptr_array_index(MESSAGE_FIELD(task,
from_mime),
0);
- unsigned int to_write = MIN(MAX_FUZZY_DOMAIN, addr->domain_len) + 2;
- if (to_write > 0 && to_write <= available) {
- *dest++ = RSPAMD_FUZZY_EXT_SOURCE_DOMAIN;
- *dest++ = to_write - 2;
+ if (addr->domain_len > 0) {
+ /* Filter invalid domains */
+ unsigned int to_write = MIN(MAX_FUZZY_DOMAIN, addr->domain_len) + 2;
- if (addr->domain_len < MAX_FUZZY_DOMAIN) {
- memcpy(dest, addr->domain, addr->domain_len);
- dest += addr->domain_len;
- }
- else {
- /* Trim from left */
- memcpy(dest,
- addr->domain + (addr->domain_len - MAX_FUZZY_DOMAIN),
- MAX_FUZZY_DOMAIN);
- dest += MAX_FUZZY_DOMAIN;
- }
+ if (to_write > 0 && to_write <= available) {
+ *dest++ = RSPAMD_FUZZY_EXT_SOURCE_DOMAIN;
+ *dest++ = to_write - 2;
+
+ if (addr->domain_len < MAX_FUZZY_DOMAIN) {
+ memcpy(dest, addr->domain, addr->domain_len);
+ dest += addr->domain_len;
+ }
+ else {
+ /* Trim from left */
+ memcpy(dest,
+ addr->domain + (addr->domain_len - MAX_FUZZY_DOMAIN),
+ MAX_FUZZY_DOMAIN);
+ dest += MAX_FUZZY_DOMAIN;
+ }
- available -= to_write;
- written += to_write;
+ available -= to_write;
+ written += to_write;
+ }
}
}
@@ -1792,7 +1861,7 @@ fuzzy_cmd_from_text_part(struct rspamd_task *task,
unsigned int i;
rspamd_cryptobox_hash_state_t st;
rspamd_stat_token_t *word;
- GArray *words;
+ rspamd_words_t *words;
struct fuzzy_cmd_io *io;
unsigned int additional_length;
unsigned char *additional_data;
@@ -1901,10 +1970,10 @@ fuzzy_cmd_from_text_part(struct rspamd_task *task,
rspamd_cryptobox_hash_init(&st, rule->hash_key->str, rule->hash_key->len);
words = fuzzy_preprocess_words(part, task->task_pool);
- for (i = 0; i < words->len; i++) {
- word = &g_array_index(words, rspamd_stat_token_t, i);
+ for (i = 0; i < kv_size(*words); i++) {
+ word = &kv_A(*words, i);
- if (!((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0)) {
+ if (!((word->flags & RSPAMD_WORD_FLAG_SKIPPED) || word->stemmed.len == 0)) {
rspamd_cryptobox_hash_update(&st, word->stemmed.begin,
word->stemmed.len);
}
@@ -2615,7 +2684,7 @@ fuzzy_insert_metric_results(struct rspamd_task *task, struct fuzzy_rule *rule,
if (task->message) {
PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp)
{
- if (!IS_TEXT_PART_EMPTY(tp) && tp->utf_words != NULL && tp->utf_words->len > 0) {
+ if (!IS_TEXT_PART_EMPTY(tp) && kv_size(tp->utf_words) > 0) {
seen_text_part = TRUE;
if (tp->utf_stripped_text.magic == UTEXT_MAGIC) {
@@ -3394,8 +3463,8 @@ register_fuzzy_client_call(struct rspamd_task *task,
int sock;
if (!rspamd_session_blocked(task->s)) {
- /* Get upstream */
- selected = rspamd_upstream_get(rule->servers, RSPAMD_UPSTREAM_ROUND_ROBIN,
+ /* Get upstream - use read_servers for check operations */
+ selected = rspamd_upstream_get(rule->read_servers, RSPAMD_UPSTREAM_ROUND_ROBIN,
NULL, 0);
if (selected) {
addr = rspamd_upstream_addr_next(selected);
@@ -3522,9 +3591,8 @@ register_fuzzy_controller_call(struct rspamd_http_connection_entry *entry,
int sock;
int ret = -1;
- /* Get upstream */
-
- while ((selected = rspamd_upstream_get_forced(rule->servers,
+ /* Get upstream - use write_servers for learn/unlearn operations */
+ while ((selected = rspamd_upstream_get_forced(rule->write_servers,
RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
/* Create UDP socket */
addr = rspamd_upstream_addr_next(selected);
@@ -3538,6 +3606,9 @@ register_fuzzy_controller_call(struct rspamd_http_connection_entry *entry,
rspamd_upstream_fail(selected, TRUE, strerror(errno));
}
else {
+ msg_info_task("fuzzy storage %s (%s rule) is used for write",
+ rspamd_inet_address_to_string_pretty(addr),
+ rule->name);
s =
rspamd_mempool_alloc0(session->pool,
sizeof(struct fuzzy_learn_session));
@@ -3620,6 +3691,7 @@ fuzzy_modify_handler(struct rspamd_http_connection_entry *conn_ent,
PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
{
if (rule->mode == fuzzy_rule_read_only) {
+ msg_debug_task("skip rule %s as it is read-only", rule->name);
continue;
}
@@ -3729,6 +3801,8 @@ fuzzy_modify_handler(struct rspamd_http_connection_entry *conn_ent,
else {
commands = fuzzy_generate_commands(task, rule, cmd, flag, value,
flags);
+ msg_debug_task("fuzzy command %d for rule %s, flag %d, value %d",
+ cmd, rule->name, flag, value);
if (commands != NULL) {
res = register_fuzzy_controller_call(conn_ent,
rule,
@@ -3894,7 +3968,7 @@ fuzzy_check_send_lua_learn(struct fuzzy_rule *rule,
/* Get upstream */
if (!rspamd_session_blocked(task->s)) {
- while ((selected = rspamd_upstream_get(rule->servers,
+ while ((selected = rspamd_upstream_get(rule->write_servers,
RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
/* Create UDP socket */
addr = rspamd_upstream_addr_next(selected);
@@ -4491,9 +4565,21 @@ fuzzy_lua_list_storages(lua_State *L)
lua_setfield(L, -2, "read_only");
/* Push servers */
- lua_createtable(L, rspamd_upstreams_count(rule->servers), 0);
- rspamd_upstreams_foreach(rule->servers, lua_upstream_str_inserter, L);
- lua_setfield(L, -2, "servers");
+ if (rule->read_servers == rule->write_servers) {
+ /* Same servers for both operations */
+ lua_createtable(L, rspamd_upstreams_count(rule->read_servers), 0);
+ rspamd_upstreams_foreach(rule->read_servers, lua_upstream_str_inserter, L);
+ lua_setfield(L, -2, "servers");
+ }
+ else {
+ /* Different servers for read and write */
+ lua_createtable(L, rspamd_upstreams_count(rule->read_servers), 0);
+ rspamd_upstreams_foreach(rule->read_servers, lua_upstream_str_inserter, L);
+ lua_setfield(L, -2, "read_servers");
+ lua_createtable(L, rspamd_upstreams_count(rule->write_servers), 0);
+ rspamd_upstreams_foreach(rule->write_servers, lua_upstream_str_inserter, L);
+ lua_setfield(L, -2, "write_servers");
+ }
/* Push flags */
GHashTableIter it;
@@ -4780,7 +4866,7 @@ fuzzy_lua_ping_storage(lua_State *L)
rspamd_ptr_array_free_hard, addrs);
}
else {
- struct upstream *selected = rspamd_upstream_get(rule_found->servers,
+ struct upstream *selected = rspamd_upstream_get(rule_found->read_servers,
RSPAMD_UPSTREAM_ROUND_ROBIN, NULL, 0);
addr = rspamd_upstream_addr_next(selected);
}
@@ -4824,4 +4910,4 @@ fuzzy_lua_ping_storage(lua_State *L)
lua_pushboolean(L, TRUE);
return 1;
-} \ No newline at end of file
+}
diff --git a/src/plugins/lua/arc.lua b/src/plugins/lua/arc.lua
index fb5dd93e6..954583ed0 100644
--- a/src/plugins/lua/arc.lua
+++ b/src/plugins/lua/arc.lua
@@ -72,12 +72,13 @@ local settings = {
use_domain = 'header',
use_esld = true,
use_redis = false,
- key_prefix = 'arc_keys', -- default hash name
- reuse_auth_results = false, -- Reuse the existing authentication results
+ key_prefix = 'arc_keys', -- default hash name
+ reuse_auth_results = false, -- Reuse the existing authentication results
whitelisted_signers_map = nil, -- Trusted signers domains
- adjust_dmarc = true, -- Adjust DMARC rejected policy for trusted forwarders
- allowed_ids = nil, -- Allowed settings id
- forbidden_ids = nil, -- Banned settings id
+ whitelist = nil, -- Domains with broken ARC implementations to trust despite validation failures
+ adjust_dmarc = true, -- Adjust DMARC rejected policy for trusted forwarders
+ allowed_ids = nil, -- Allowed settings id
+ forbidden_ids = nil, -- Banned settings id
}
-- To match normal AR
@@ -86,15 +87,15 @@ local ar_settings = lua_auth_results.default_settings
local function parse_arc_header(hdr, target, is_aar)
-- Split elements by ';' and trim spaces
local arr = fun.totable(fun.map(
- function(val)
- return fun.totable(fun.map(lua_util.rspamd_str_trim,
- fun.filter(function(v)
- return v and #v > 0
- end,
- lua_util.rspamd_str_split(val.decoded, ';')
- )
- ))
- end, hdr
+ function(val)
+ return fun.totable(fun.map(lua_util.rspamd_str_trim,
+ fun.filter(function(v)
+ return v and #v > 0
+ end,
+ lua_util.rspamd_str_split(val.decoded, ';')
+ )
+ ))
+ end, hdr
))
-- v[1] is the key and v[2] is the value
@@ -115,11 +116,11 @@ local function parse_arc_header(hdr, target, is_aar)
if not is_aar then
-- For normal ARC headers we split by kv pair, like k=v
fun.each(function(v)
- fill_arc_header_table(v, target[i])
- end,
- fun.map(function(elt)
- return lua_util.rspamd_str_split(elt, '=')
- end, elts)
+ fill_arc_header_table(v, target[i])
+ end,
+ fun.map(function(elt)
+ return lua_util.rspamd_str_split(elt, '=')
+ end, elts)
)
else
-- For AAR we check special case of i=%d and pass everything else to
@@ -147,7 +148,7 @@ local function parse_arc_header(hdr, target, is_aar)
-- sort by i= attribute
table.sort(target, function(a, b)
- return (a.i or 0) < (b.i or 0)
+ return (tonumber(a.i) or 0) < (tonumber(b.i) or 0)
end)
end
@@ -156,14 +157,14 @@ local function arc_validate_seals(task, seals, sigs, seal_headers, sig_headers)
for i = 1, #seals do
if (sigs[i].i or 0) ~= i then
fail_reason = string.format('bad i for signature: %d, expected %d; d=%s',
- sigs[i].i, i, sigs[i].d)
+ sigs[i].i, i, sigs[i].d)
rspamd_logger.infox(task, fail_reason)
task:insert_result(arc_symbols['invalid'], 1.0, fail_reason)
return false, fail_reason
end
if (seals[i].i or 0) ~= i then
fail_reason = string.format('bad i for seal: %d, expected %d; d=%s',
- seals[i].i, i, seals[i].d)
+ seals[i].i, i, seals[i].d)
rspamd_logger.infox(task, fail_reason)
task:insert_result(arc_symbols['invalid'], 1.0, fail_reason)
return false, fail_reason
@@ -207,7 +208,7 @@ local function arc_callback(task)
if #arc_sig_headers ~= #arc_seal_headers then
-- We mandate that count of seals is equal to count of signatures
rspamd_logger.infox(task, 'number of seals (%s) is not equal to number of signatures (%s)',
- #arc_seal_headers, #arc_sig_headers)
+ #arc_seal_headers, #arc_sig_headers)
task:insert_result(arc_symbols['invalid'], 1.0, 'invalid count of seals and signatures')
return
end
@@ -249,7 +250,7 @@ local function arc_callback(task)
-- Now check sanity of what we have
local valid, validation_error = arc_validate_seals(task, cbdata.seals, cbdata.sigs,
- arc_seal_headers, arc_sig_headers)
+ arc_seal_headers, arc_sig_headers)
if not valid then
task:cache_set('arc-failure', validation_error)
return
@@ -267,12 +268,20 @@ local function arc_callback(task)
local function gen_arc_seal_cb(index, sig)
return function(_, res, err, domain)
lua_util.debugm(N, task, 'checked arc seal: %s(%s), %s processed',
- res, err, index)
+ res, err, index)
if not res then
- cbdata.res = 'fail'
- if err and domain then
- table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err))
+ -- Check if this domain is whitelisted for broken ARC implementations
+ if settings.whitelist and domain and settings.whitelist:get_key(domain) then
+ rspamd_logger.infox(task, 'ARC seal validation failed for whitelisted domain %s, treating as valid: %s',
+ domain, err)
+ lua_util.debugm(N, task, 'whitelisted domain %s ARC seal failure ignored', domain)
+ res = true -- Treat as valid to continue the chain
+ else
+ cbdata.res = 'fail'
+ if err and domain then
+ table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err))
+ end
end
end
@@ -283,7 +292,7 @@ local function arc_callback(task)
local cur_aar = cbdata.ars[index]
if not cur_aar then
rspamd_logger.warnx(task, "cannot find Arc-Authentication-Results for trusted " ..
- "forwarder %s on i=%s", domain, cbdata.index)
+ "forwarder %s on i=%s", domain, cbdata.index)
else
task:cache_set(AR_TRUSTED_CACHE_KEY, cur_aar)
local seen_dmarc
@@ -309,20 +318,20 @@ local function arc_callback(task)
end
end
task:insert_result(arc_symbols.trusted_allow, mult,
- string.format('%s:s=%s:i=%d', domain, sig.s, index))
+ string.format('%s:s=%s:i=%d', domain, sig.s, index))
end
end
if index == #arc_sig_headers then
if cbdata.res == 'success' then
local arc_allow_result = string.format('%s:s=%s:i=%d',
- domain, sig.s, index)
+ domain, sig.s, index)
task:insert_result(arc_symbols.allow, 1.0, arc_allow_result)
task:cache_set('arc-allow', arc_allow_result)
else
task:insert_result(arc_symbols.reject, 1.0,
- rspamd_logger.slog('seal check failed: %s, %s', cbdata.res,
- cbdata.errors))
+ rspamd_logger.slog('seal check failed: %s, %s', cbdata.res,
+ cbdata.errors))
end
end
end
@@ -330,12 +339,20 @@ local function arc_callback(task)
local function arc_signature_cb(_, res, err, domain)
lua_util.debugm(N, task, 'checked arc signature %s: %s(%s)',
- domain, res, err)
+ domain, res, err)
if not res then
- cbdata.res = 'fail'
- if err and domain then
- table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err))
+ -- Check if this domain is whitelisted for broken ARC implementations
+ if settings.whitelist and domain and settings.whitelist:get_key(domain) then
+ rspamd_logger.infox(task, 'ARC signature validation failed for whitelisted domain %s, treating as valid: %s',
+ domain, err)
+ lua_util.debugm(N, task, 'whitelisted domain %s ARC signature failure ignored', domain)
+ res = true -- Treat as valid to continue the chain
+ else
+ cbdata.res = 'fail'
+ if err and domain then
+ table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err))
+ end
end
end
if cbdata.res == 'success' then
@@ -343,17 +360,24 @@ local function arc_callback(task)
for i, sig in ipairs(cbdata.seals) do
local ret, lerr = dkim_verify(task, sig.header, gen_arc_seal_cb(i, sig), 'arc-seal')
if not ret then
- cbdata.res = 'fail'
- table.insert(cbdata.errors, string.format('seal:%s:s=%s:i=%s:%s',
+ -- Check if this domain is whitelisted for broken ARC implementations
+ if settings.whitelist and sig.d and settings.whitelist:get_key(sig.d) then
+ rspamd_logger.infox(task, 'ARC seal dkim_verify failed for whitelisted domain %s, treating as valid: %s',
+ sig.d, lerr)
+ lua_util.debugm(N, task, 'whitelisted domain %s ARC seal dkim_verify failure ignored', sig.d)
+ else
+ cbdata.res = 'fail'
+ table.insert(cbdata.errors, string.format('seal:%s:s=%s:i=%s:%s',
sig.d or '', sig.s or '', sig.i or '', lerr))
- lua_util.debugm(N, task, 'checked arc seal %s: %s(%s), %s processed',
+ lua_util.debugm(N, task, 'checked arc seal %s: %s(%s), %s processed',
sig.d, ret, lerr, i)
+ end
end
end
else
task:insert_result(arc_symbols['reject'], 1.0,
- rspamd_logger.slog('signature check failed: %s, %s', cbdata.res,
- cbdata.errors))
+ rspamd_logger.slog('signature check failed: %s, %s', cbdata.res,
+ cbdata.errors))
end
end
@@ -397,25 +421,33 @@ local function arc_callback(task)
is "fail" and the algorithm stops here.
9. If the algorithm reaches this step, then the Chain Validation
Status is "pass", and the algorithm is complete.
- ]]--
+ ]] --
local processed = 0
local sig = cbdata.sigs[#cbdata.sigs] -- last AMS
local ret, err = dkim_verify(task, sig.header, arc_signature_cb, 'arc-sign')
if not ret then
- cbdata.res = 'fail'
- table.insert(cbdata.errors, string.format('sig:%s:%s', sig.d or '', err))
+ -- Check if this domain is whitelisted for broken ARC implementations
+ if settings.whitelist and sig.d and settings.whitelist:get_key(sig.d) then
+ rspamd_logger.infox(task, 'ARC signature dkim_verify failed for whitelisted domain %s, treating as valid: %s',
+ sig.d, err)
+ lua_util.debugm(N, task, 'whitelisted domain %s ARC signature dkim_verify failure ignored', sig.d)
+ processed = processed + 1
+ else
+ cbdata.res = 'fail'
+ table.insert(cbdata.errors, string.format('sig:%s:%s', sig.d or '', err))
+ end
else
processed = processed + 1
lua_util.debugm(N, task, 'processed arc signature %s[%s]: %s(%s), %s total',
- sig.d, sig.i, ret, err, #cbdata.seals)
+ sig.d, sig.i, ret, err, #cbdata.seals)
end
if processed == 0 then
task:insert_result(arc_symbols['reject'], 1.0,
- rspamd_logger.slog('cannot verify %s of %s signatures: %s',
- #arc_sig_headers - processed, #arc_sig_headers, cbdata.errors))
+ rspamd_logger.slog('cannot verify %s of %s signatures: %s',
+ #arc_sig_headers - processed, #arc_sig_headers, cbdata.errors))
end
end
@@ -538,13 +570,13 @@ local function arc_sign_seal(task, params, header)
for i = 1, #arc_seals, 1 do
if arc_auth_results[i] then
local s = dkim_canonicalize('ARC-Authentication-Results',
- arc_auth_results[i].raw_header)
+ arc_auth_results[i].raw_header)
sha_ctx:update(s)
lua_util.debugm(N, task, 'update signature with header: %s', s)
end
if arc_sigs[i] then
local s = dkim_canonicalize('ARC-Message-Signature',
- arc_sigs[i].raw_header)
+ arc_sigs[i].raw_header)
sha_ctx:update(s)
lua_util.debugm(N, task, 'update signature with header: %s', s)
end
@@ -557,16 +589,16 @@ local function arc_sign_seal(task, params, header)
end
header = lua_util.fold_header(task,
- 'ARC-Message-Signature',
- header)
+ 'ARC-Message-Signature',
+ header)
cur_auth_results = string.format('i=%d; %s', cur_idx, cur_auth_results)
cur_auth_results = lua_util.fold_header(task,
- 'ARC-Authentication-Results',
- cur_auth_results, ';')
+ 'ARC-Authentication-Results',
+ cur_auth_results, ';')
local s = dkim_canonicalize('ARC-Authentication-Results',
- cur_auth_results)
+ cur_auth_results)
sha_ctx:update(s)
lua_util.debugm(N, task, 'update signature with header: %s', s)
s = dkim_canonicalize('ARC-Message-Signature', header)
@@ -574,10 +606,10 @@ local function arc_sign_seal(task, params, header)
lua_util.debugm(N, task, 'update signature with header: %s', s)
local cur_arc_seal = string.format('i=%d; s=%s; d=%s; t=%d; a=rsa-sha256; cv=%s; b=',
- cur_idx,
- params.selector,
- params.domain,
- math.floor(rspamd_util.get_time()), params.arc_cv)
+ cur_idx,
+ params.selector,
+ params.domain,
+ math.floor(rspamd_util.get_time()), params.arc_cv)
s = string.format('%s:%s', 'arc-seal', cur_arc_seal)
sha_ctx:update(s)
lua_util.debugm(N, task, 'initial update signature with header: %s', s)
@@ -591,20 +623,23 @@ local function arc_sign_seal(task, params, header)
local sig = rspamd_rsa.sign_memory(privkey, sha_ctx:bin())
cur_arc_seal = string.format('%s%s', cur_arc_seal,
- sig:base64(70, nl_type))
+ sig:base64(70, nl_type))
lua_mime.modify_headers(task, {
add = {
['ARC-Authentication-Results'] = { order = 1, value = cur_auth_results },
['ARC-Message-Signature'] = { order = 1, value = header },
- ['ARC-Seal'] = { order = 1, value = lua_util.fold_header(task,
- 'ARC-Seal', cur_arc_seal) }
+ ['ARC-Seal'] = {
+ order = 1,
+ value = lua_util.fold_header(task,
+ 'ARC-Seal', cur_arc_seal)
+ }
},
-- RFC requires a strict order for these headers to be inserted
order = { 'ARC-Authentication-Results', 'ARC-Message-Signature', 'ARC-Seal' },
})
task:insert_result(settings.sign_symbol, 1.0,
- string.format('%s:s=%s:i=%d', params.domain, params.selector, cur_idx))
+ string.format('%s:s=%s:i=%d', params.domain, params.selector, cur_idx))
end
local function prepare_arc_selector(task, sel)
@@ -668,7 +703,6 @@ local function prepare_arc_selector(task, sel)
else
default_arc_cv()
end
-
end
return true
@@ -695,19 +729,18 @@ local function do_sign(task, sign_params)
sign_params.pubkey = results[1]
sign_params.strict_pubkey_check = not settings.allow_pubkey_mismatch
elseif not settings.allow_pubkey_mismatch then
- rspamd_logger.errx('public key for domain %s/%s is not found: %s, skip signing',
- sign_params.domain, sign_params.selector, err)
+ rspamd_logger.errx(task, 'public key for domain %s/%s is not found: %s, skip signing',
+ sign_params.domain, sign_params.selector, err)
return
else
- rspamd_logger.infox('public key for domain %s/%s is not found: %s',
- sign_params.domain, sign_params.selector, err)
+ rspamd_logger.infox(task, 'public key for domain %s/%s is not found: %s',
+ sign_params.domain, sign_params.selector, err)
end
local dret, hdr = dkim_sign(task, sign_params)
if dret then
arc_sign_seal(task, sign_params, hdr)
end
-
end,
forced = true
})
@@ -768,6 +801,31 @@ end
dkim_sign_tools.process_signing_settings(N, settings, opts)
+-- Process ARC-specific maps that aren't handled by dkim_sign_tools
+local lua_maps = require "lua_maps"
+
+if opts.whitelisted_signers_map then
+ settings.whitelisted_signers_map = lua_maps.map_add_from_ucl(opts.whitelisted_signers_map, 'set',
+ 'ARC trusted signers domains')
+ if not settings.whitelisted_signers_map then
+ rspamd_logger.errx(rspamd_config, 'cannot load whitelisted_signers_map')
+ settings.whitelisted_signers_map = nil
+ else
+ rspamd_logger.infox(rspamd_config, 'loaded ARC whitelisted signers map')
+ end
+end
+
+if opts.whitelist then
+ settings.whitelist = lua_maps.map_add_from_ucl(opts.whitelist, 'set',
+ 'ARC domains with broken implementations')
+ if not settings.whitelist then
+ rspamd_logger.errx(rspamd_config, 'cannot load ARC whitelist map')
+ settings.whitelist = nil
+ else
+ rspamd_logger.infox(rspamd_config, 'loaded ARC whitelist map')
+ end
+end
+
if not dkim_sign_tools.validate_signing_settings(settings) then
rspamd_logger.infox(rspamd_config, 'mandatory parameters missing, disable arc signing')
return
@@ -780,7 +838,7 @@ if ar_opts and ar_opts.routines then
if routines['authentication-results'] then
ar_settings = lua_util.override_defaults(ar_settings,
- routines['authentication-results'])
+ routines['authentication-results'])
end
end
@@ -789,7 +847,7 @@ if settings.use_redis then
if not redis_params then
rspamd_logger.errx(rspamd_config, 'no servers are specified, ' ..
- 'but module is configured to load keys from redis, disable arc signing')
+ 'but module is configured to load keys from redis, disable arc signing')
return
end
@@ -845,9 +903,9 @@ if settings.adjust_dmarc and settings.whitelisted_signers_map then
local dmarc_fwd = ar.dmarc
if dmarc_fwd == 'pass' then
rspamd_logger.infox(task, "adjust dmarc reject score as trusted forwarder "
- .. "proved DMARC validity for %s", ar['header.from'])
+ .. "proved DMARC validity for %s", ar['header.from'])
task:adjust_result(sym_to_adjust, 0.1,
- 'ARC trusted')
+ 'ARC trusted')
end
end
end
diff --git a/src/plugins/lua/bayes_expiry.lua b/src/plugins/lua/bayes_expiry.lua
index 44ff9dafa..0d78f2272 100644
--- a/src/plugins/lua/bayes_expiry.lua
+++ b/src/plugins/lua/bayes_expiry.lua
@@ -41,32 +41,38 @@ local template = {}
local function check_redis_classifier(cls, cfg)
-- Skip old classifiers
if cls.new_schema then
- local symbol_spam, symbol_ham
+ local class_symbols = {}
+ local class_labels = {}
local expiry = (cls.expiry or cls.expire)
if type(expiry) == 'table' then
expiry = expiry[1]
end
- -- Load symbols from statfiles
+ -- Extract class_labels mapping from classifier config
+ if cls.class_labels then
+ class_labels = cls.class_labels
+ end
+ -- Load symbols from statfiles for multi-class support
local function check_statfile_table(tbl, def_sym)
local symbol = tbl.symbol or def_sym
-
- local spam
- if tbl.spam then
- spam = tbl.spam
- else
- if string.match(symbol:upper(), 'SPAM') then
- spam = true
+ local class_name = tbl.class
+
+ -- Handle legacy spam/ham detection for backward compatibility
+ if not class_name then
+ if tbl.spam ~= nil then
+ class_name = tbl.spam and 'spam' or 'ham'
+ elseif string.match(tostring(symbol):upper(), 'SPAM') then
+ class_name = 'spam'
+ elseif string.match(tostring(symbol):upper(), 'HAM') then
+ class_name = 'ham'
else
- spam = false
+ class_name = def_sym
end
end
- if spam then
- symbol_spam = symbol
- else
- symbol_ham = symbol
+ if class_name then
+ class_symbols[class_name] = symbol
end
end
@@ -87,10 +93,9 @@ local function check_redis_classifier(cls, cfg)
end
end
- if not symbol_spam or not symbol_ham or type(expiry) ~= 'number' then
+ if next(class_symbols) == nil or type(expiry) ~= 'number' then
logger.debugm(N, rspamd_config,
- 'disable expiry for classifier %s: no expiry %s',
- symbol_spam, cls)
+ 'disable expiry for classifier: no class symbols or expiry configured')
return
end
-- Now try to load redis_params if needed
@@ -108,17 +113,16 @@ local function check_redis_classifier(cls, cfg)
end
if redis_params['read_only'] then
- logger.infox(rspamd_config, 'disable expiry for classifier %s: read only redis configuration',
- symbol_spam)
+ logger.infox(rspamd_config, 'disable expiry for classifier: read only redis configuration')
return
end
- logger.debugm(N, rspamd_config, "enabled expiry for %s/%s -> %s expiry",
- symbol_spam, symbol_ham, expiry)
+ logger.debugm(N, rspamd_config, "enabled expiry for classes %s -> %s expiry",
+ table.concat(lutil.keys(class_symbols), ', '), expiry)
table.insert(settings.classifiers, {
- symbol_spam = symbol_spam,
- symbol_ham = symbol_ham,
+ class_symbols = class_symbols,
+ class_labels = class_labels,
redis_params = redis_params,
expiry = expiry
})
@@ -249,12 +253,11 @@ local expiry_script = [[
local keys = ret[2]
local tokens = {}
- -- Tokens occurrences distribution counters
+ -- Dynamic occurrence tracking for all classes
local occur = {
- ham = {},
- spam = {},
total = {}
}
+ local classes_found = {}
-- Expiry step statistics counters
local nelts, extended, discriminated, sum, sum_squares, common, significant,
@@ -264,24 +267,44 @@ local expiry_script = [[
for _,key in ipairs(keys) do
local t = redis.call('TYPE', key)["ok"]
if t == 'hash' then
- local values = redis.call('HMGET', key, 'H', 'S')
- local ham = tonumber(values[1]) or 0
- local spam = tonumber(values[2]) or 0
+ -- Get all hash fields to support multi-class
+ local hash_data = redis.call('HGETALL', key)
+ local class_counts = {}
+ local total = 0
local ttl = redis.call('TTL', key)
+
+ -- Parse hash data into class counts
+ for i = 1, #hash_data, 2 do
+ local class_label = hash_data[i]
+ local count = tonumber(hash_data[i + 1]) or 0
+ class_counts[class_label] = count
+ total = total + count
+
+ -- Track classes we've seen
+ if not classes_found[class_label] then
+ classes_found[class_label] = true
+ occur[class_label] = {}
+ end
+ end
+
tokens[key] = {
- ham,
- spam,
- ttl
+ class_counts = class_counts,
+ total = total,
+ ttl = ttl
}
- local total = spam + ham
+
sum = sum + total
sum_squares = sum_squares + total * total
nelts = nelts + 1
- for k,v in pairs({['ham']=ham, ['spam']=spam, ['total']=total}) do
- if tonumber(v) > 19 then v = 20 end
- occur[k][v] = occur[k][v] and occur[k][v] + 1 or 1
+ -- Update occurrence counters for all classes and total
+ for class_label, count in pairs(class_counts) do
+ local bucket = count > 19 and 20 or count
+ occur[class_label][bucket] = (occur[class_label][bucket] or 0) + 1
end
+
+ local total_bucket = total > 19 and 20 or total
+ occur.total[total_bucket] = (occur.total[total_bucket] or 0) + 1
end
end
@@ -293,9 +316,10 @@ local expiry_script = [[
end
for key,token in pairs(tokens) do
- local ham, spam, ttl = token[1], token[2], tonumber(token[3])
+ local class_counts = token.class_counts
+ local total = token.total
+ local ttl = tonumber(token.ttl)
local threshold = mean
- local total = spam + ham
local function set_ttl()
if expire < 0 then
@@ -310,14 +334,39 @@ local expiry_script = [[
return 0
end
- if total == 0 or math.abs(ham - spam) <= total * ${epsilon_common} then
+ -- Check if token is common (balanced across classes)
+ local is_common = false
+ if total == 0 then
+ is_common = true
+ else
+ -- For multi-class, check if any class dominates significantly
+ local max_count = 0
+ for _, count in pairs(class_counts) do
+ if count > max_count then
+ max_count = count
+ end
+ end
+ -- Token is common if no class has more than (1 - epsilon) of total
+ is_common = (max_count / total) <= (1 - ${epsilon_common})
+ end
+
+ if is_common then
common = common + 1
if ttl > ${common_ttl} then
discriminated = discriminated + 1
redis.call('EXPIRE', key, ${common_ttl})
end
elseif total >= threshold and total > 0 then
- if ham / total > ${significant_factor} or spam / total > ${significant_factor} then
+ -- Check if any class is significant
+ local is_significant = false
+ for _, count in pairs(class_counts) do
+ if count / total > ${significant_factor} then
+ is_significant = true
+ break
+ end
+ end
+
+ if is_significant then
significant = significant + 1
if ttl ~= -1 then
redis.call('PERSIST', key)
@@ -361,33 +410,50 @@ local expiry_script = [[
redis.call('DEL', lock_key)
local occ_distr = {}
- for _,cl in pairs({'ham', 'spam', 'total'}) do
+
+ -- Process all classes found plus total
+ local all_classes = {'total'}
+ for class_label in pairs(classes_found) do
+ table.insert(all_classes, class_label)
+ end
+
+ for _, cl in ipairs(all_classes) do
local occur_key = pattern_sha1 .. '_occurrence_' .. cl
if cursor ~= 0 then
- local n
- for i,v in ipairs(redis.call('HGETALL', occur_key)) do
- if i % 2 == 1 then
- n = tonumber(v)
- else
- occur[cl][n] = occur[cl][n] and occur[cl][n] + v or v
+ local existing_data = redis.call('HGETALL', occur_key)
+ if #existing_data > 0 then
+ for i = 1, #existing_data, 2 do
+ local bucket = tonumber(existing_data[i])
+ local count = tonumber(existing_data[i + 1])
+ if occur[cl] and occur[cl][bucket] then
+ occur[cl][bucket] = occur[cl][bucket] + count
+ elseif occur[cl] then
+ occur[cl][bucket] = count
+ end
end
end
- local str = ''
- if occur[cl][0] ~= nil then
- str = '0:' .. occur[cl][0] .. ','
- end
- for k,v in ipairs(occur[cl]) do
- if k == 20 then k = '>19' end
- str = str .. k .. ':' .. v .. ','
+ if occur[cl] and next(occur[cl]) then
+ local str = ''
+ if occur[cl][0] then
+ str = '0:' .. occur[cl][0] .. ','
+ end
+ for k = 1, 20 do
+ if occur[cl][k] then
+ local label = k == 20 and '>19' or tostring(k)
+ str = str .. label .. ':' .. occur[cl][k] .. ','
+ end
+ end
+ table.insert(occ_distr, cl .. '=' .. str)
+ else
+ table.insert(occ_distr, cl .. '=no_data')
end
- table.insert(occ_distr, str)
else
redis.call('DEL', occur_key)
end
- if next(occur[cl]) ~= nil then
+ if occur[cl] and next(occur[cl]) then
redis.call('HMSET', occur_key, unpack_function(hash2list(occur[cl])))
end
end
@@ -446,8 +512,8 @@ local function expire_step(cls, ev_base, worker)
'%s infrequent (%s %s), %s mean, %s std',
lutil.unpack(d))
if cycle then
- for i, cl in ipairs({ 'in ham', 'in spam', 'total' }) do
- logger.infox(rspamd_config, 'tokens occurrences, %s: {%s}', cl, occ_distr[i])
+ for _, distr_info in ipairs(occ_distr) do
+ logger.infox(rspamd_config, 'tokens occurrences: {%s}', distr_info)
end
end
end
diff --git a/src/plugins/lua/contextal.lua b/src/plugins/lua/contextal.lua
index 19b599e11..e29c21645 100644
--- a/src/plugins/lua/contextal.lua
+++ b/src/plugins/lua/contextal.lua
@@ -79,7 +79,7 @@ local wait_request_ttl = true
local function maybe_defer(task, obj)
if settings.defer_if_no_result and not ((obj or E)[1] or E).actions then
- task:set_pre_result('soft reject', settings.defer_message)
+ task:set_pre_result('soft reject', settings.defer_message, N)
end
end
diff --git a/src/plugins/lua/fuzzy_collect.lua b/src/plugins/lua/fuzzy_collect.lua
index 132ace90c..060cc2fc2 100644
--- a/src/plugins/lua/fuzzy_collect.lua
+++ b/src/plugins/lua/fuzzy_collect.lua
@@ -34,7 +34,7 @@ local settings = {
local function send_data_mirror(m, cfg, ev_base, body)
local function store_callback(err, _, _, _)
if err then
- rspamd_logger.errx(cfg, 'cannot save data on %(%s): %s', m.server, m.name, err)
+ rspamd_logger.errx(cfg, 'cannot save data on %s(%s): %s', m.server, m.name, err)
else
rspamd_logger.infox(cfg, 'saved data on %s(%s)', m.server, m.name)
end
diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua
index 5d1cf5e06..331dbbce2 100644
--- a/src/plugins/lua/gpt.lua
+++ b/src/plugins/lua/gpt.lua
@@ -20,9 +20,9 @@ local E = {}
if confighelp then
rspamd_config:add_example(nil, 'gpt',
- "Performs postfiltering using GPT model",
- [[
-gpt {
+ "Performs postfiltering using GPT model",
+ [[
+ gpt {
# Supported types: openai, ollama
type = "openai";
# Your key to access the API
@@ -53,7 +53,7 @@ gpt {
reason_header = "X-GPT-Reason";
# Use JSON format for response
json = false;
-}
+ }
]])
return
end
@@ -162,7 +162,7 @@ local function default_condition(task)
end
end
lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
- sym.weight, required_weight)
+ sym.weight, required_weight)
else
return false, 'skip as "' .. s .. '" is found'
end
@@ -182,7 +182,7 @@ local function default_condition(task)
end
end
lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
- sym.weight, required_weight)
+ sym.weight, required_weight)
end
else
return false, 'skip as "' .. s .. '" is not found'
@@ -253,6 +253,15 @@ local function maybe_extract_json(str)
return nil
end
+-- Helper function to remove <think>...</think> and trim leading newlines
+local function clean_gpt_response(text)
+ -- Remove <think>...</think> including multiline
+ text = text:gsub("<think>.-</think>", "")
+ -- Trim leading whitespace and newlines
+ text = text:gsub("^%s*\n*", "")
+ return text
+end
+
local function default_openai_json_conversion(task, input)
local parser = ucl.parser()
local res, err = parser:parse_string(input)
@@ -301,7 +310,7 @@ local function default_openai_json_conversion(task, input)
elseif reply.probability == "low" then
spam_score = 0.1
else
- rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability)
+ rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability)
end
end
@@ -349,17 +358,25 @@ local function default_openai_plain_conversion(task, input)
rspamd_logger.errx(task, 'no content in the first message')
return
end
+
+ -- Clean message
+ first_message = clean_gpt_response(first_message)
+
local lines = lua_util.str_split(first_message, '\n')
local first_line = clean_reply_line(lines[1])
local spam_score = tonumber(first_line)
local reason = clean_reply_line(lines[2])
local categories = lua_util.str_split(clean_reply_line(lines[3]), ',')
+ if type(reply.usage) == 'table' then
+ rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
+ end
+
if spam_score then
return spam_score, reason, categories
end
- rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1])
+ rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1], first_message)
return
end
@@ -387,6 +404,10 @@ local function default_ollama_plain_conversion(task, input)
rspamd_logger.errx(task, 'no content in the first message')
return
end
+
+ -- Clean message
+ first_message = clean_gpt_response(first_message)
+
local lines = lua_util.str_split(first_message, '\n')
local first_line = clean_reply_line(lines[1])
local spam_score = tonumber(first_line)
@@ -397,7 +418,7 @@ local function default_ollama_plain_conversion(task, input)
return spam_score, reason, categories
end
- rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s', lines[1])
+ rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1], first_message)
return
end
@@ -449,7 +470,7 @@ local function default_ollama_json_conversion(task, input)
elseif reply.probability == "low" then
spam_score = 0.1
else
- rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability)
+ rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability)
end
end
@@ -477,7 +498,7 @@ local function redis_cache_key(sel_part)
env_digest = digest:hex():sub(1, 4)
end
return string.format('%s_%s', env_digest,
- sel_part:get_mimepart():get_digest():sub(1, 24))
+ sel_part:get_mimepart():get_digest():sub(1, 24))
end
local function process_categories(task, categories)
@@ -514,9 +535,9 @@ local function insert_results(task, result, sel_part)
end
end
if result.reason and settings.reason_header then
- lua_mime.modify_headers(task,
- { add = { [settings.reason_header] = { value = tostring(result.reason), order = 1 } } })
- end
+ lua_mime.modify_headers(task,
+ { add = { [settings.reason_header] = { value = tostring(result.reason), order = 1 } } })
+ end
if cache_context then
lua_cache.cache_set(task, redis_cache_key(sel_part), result, cache_context)
@@ -540,12 +561,12 @@ local function check_consensus_and_insert_results(task, results, sel_part)
nspam = nspam + 1
max_spam_prob = math.max(max_spam_prob, result.probability)
lua_util.debugm(N, task, "model: %s; spam: %s; reason: '%s'",
- result.model, result.probability, result.reason)
+ result.model, result.probability, result.reason)
else
nham = nham + 1
max_ham_prob = math.min(max_ham_prob, result.probability)
lua_util.debugm(N, task, "model: %s; ham: %s; reason: '%s'",
- result.model, result.probability, result.reason)
+ result.model, result.probability, result.reason)
end
if result.reason then
@@ -559,23 +580,22 @@ local function check_consensus_and_insert_results(task, results, sel_part)
if nspam > nham and max_spam_prob > 0.75 then
insert_results(task, {
- probability = max_spam_prob,
- reason = reason.reason,
- categories = reason.categories,
- },
- sel_part)
+ probability = max_spam_prob,
+ reason = reason.reason,
+ categories = reason.categories,
+ },
+ sel_part)
elseif nham > nspam and max_ham_prob < 0.25 then
insert_results(task, {
- probability = max_ham_prob,
- reason = reason.reason,
- categories = reason.categories,
- },
- sel_part)
+ probability = max_ham_prob,
+ reason = reason.reason,
+ categories = reason.categories,
+ },
+ sel_part)
else
-- No consensus
lua_util.debugm(N, task, "no consensus")
end
-
end
local function get_meta_llm_content(task)
@@ -674,7 +694,7 @@ local function openai_check(task, content, sel_part)
},
{
role = 'user',
- content = 'Subject: ' .. task:get_subject() or '',
+ content = 'Subject: ' .. (task:get_subject() or ''),
},
{
role = 'user',
@@ -726,7 +746,6 @@ local function openai_check(task, content, sel_part)
if not rspamd_http.request(http_params) then
results[idx].checked = true
end
-
end
end
diff --git a/src/plugins/lua/hfilter.lua b/src/plugins/lua/hfilter.lua
index a783565ab..32102e4f8 100644
--- a/src/plugins/lua/hfilter.lua
+++ b/src/plugins/lua/hfilter.lua
@@ -131,6 +131,7 @@ local checks_hellohost = [[
/modem[.-][0-9]/i 5
/[0-9][.-]?dhcp/i 5
/wifi[.-][0-9]/i 5
+/[.-]vps[.-]/i 1
]]
local checks_hellohost_map
diff --git a/src/plugins/lua/history_redis.lua b/src/plugins/lua/history_redis.lua
index a3fdb0ec4..44eb40ad9 100644
--- a/src/plugins/lua/history_redis.lua
+++ b/src/plugins/lua/history_redis.lua
@@ -138,7 +138,7 @@ end
local function history_save(task)
local function redis_llen_cb(err, _)
if err then
- rspamd_logger.errx(task, 'got error %s when writing history row: %s',
+ rspamd_logger.errx(task, 'got error %s when writing history row',
err)
end
end
@@ -188,7 +188,7 @@ local function handle_history_request(task, conn, from, to, reset)
if reset then
local function redis_ltrim_cb(err, _)
if err then
- rspamd_logger.errx(task, 'got error %s when resetting history: %s',
+ rspamd_logger.errx(task, 'got error %s when resetting history',
err)
conn:send_error(504, '{"error": "' .. err .. '"}')
else
@@ -258,7 +258,7 @@ local function handle_history_request(task, conn, from, to, reset)
(rspamd_util:get_ticks() - t1) * 1000.0)
collectgarbage()
else
- rspamd_logger.errx(task, 'got error %s when getting history: %s',
+ rspamd_logger.errx(task, 'got error %s when getting history',
err)
conn:send_error(504, '{"error": "' .. err .. '"}')
end
diff --git a/src/plugins/lua/known_senders.lua b/src/plugins/lua/known_senders.lua
index 5cb2ddcf5..0cbf3cdcf 100644
--- a/src/plugins/lua/known_senders.lua
+++ b/src/plugins/lua/known_senders.lua
@@ -106,21 +106,26 @@ local function configure_scripts(_, _, _)
-- script checks if given recipients are in the local replies set of the sender
local redis_zscore_script = [[
local replies_recipients_addrs = ARGV
- if replies_recipients_addrs then
+ if replies_recipients_addrs and #replies_recipients_addrs > 0 then
+ local found = false
for _, rcpt in ipairs(replies_recipients_addrs) do
local score = redis.call('ZSCORE', KEYS[1], rcpt)
- -- check if score is nil (for some reason redis script does not see if score is a nil value)
- if type(score) == 'boolean' then
- score = nil
- -- 0 is stand for failure code
- return 0
+ if score then
+ -- If we found at least one recipient, consider it a match
+ found = true
+ break
end
end
- -- first number in return statement is stands for the success/failure code
- -- where success code is 1 and failure code is 0
- return 1
+
+ if found then
+ -- Success code is 1
+ return 1
+ else
+ -- Failure code is 0
+ return 0
+ end
else
- -- 0 is a failure code
+ -- No recipients to check, failure code is 0
return 0
end
]]
@@ -259,7 +264,13 @@ local function verify_local_replies_set(task)
return nil
end
- local replies_recipients = task:get_recipients('mime') or E
+ local replies_recipients = task:get_recipients('smtp') or E
+
+ -- If no recipients, don't proceed
+ if #replies_recipients == 0 then
+ lua_util.debugm(N, task, 'No recipients to verify')
+ return nil
+ end
local replies_sender_string = lua_util.maybe_obfuscate_string(tostring(replies_sender), settings,
settings.sender_prefix)
@@ -268,13 +279,16 @@ local function verify_local_replies_set(task)
local function redis_zscore_script_cb(err, data)
if err ~= nil then
rspamd_logger.errx(task, 'Could not verify %s local replies set %s', replies_sender_key, err)
- end
- if data ~= 1 then
- lua_util.debugm(N, task, 'Recipients were not verified')
return
end
- lua_util.debugm(N, task, 'Recipients were verified')
- task:insert_result(settings.symbol_check_mail_local, 1.0, replies_sender_key)
+
+ -- We need to ensure we're properly checking the result
+ if data == 1 then
+ lua_util.debugm(N, task, 'Recipients were verified')
+ task:insert_result(settings.symbol_check_mail_local, 1.0, replies_sender_key)
+ else
+ lua_util.debugm(N, task, 'Recipients were not verified, data=%s', data)
+ end
end
local replies_recipients_addrs = {}
@@ -284,12 +298,24 @@ local function verify_local_replies_set(task)
table.insert(replies_recipients_addrs, replies_recipients[i].addr)
end
- lua_util.debugm(N, task, 'Making redis request to local replies set')
- lua_redis.exec_redis_script(zscore_script_id,
+ -- Only proceed if we have recipients to check
+ if #replies_recipients_addrs == 0 then
+ lua_util.debugm(N, task, 'No recipient addresses to verify')
+ return nil
+ end
+
+ lua_util.debugm(N, task, 'Making redis request to local replies set with key %s and recipients %s',
+ replies_sender_key, table.concat(replies_recipients_addrs, ", "))
+
+ local ret = lua_redis.exec_redis_script(zscore_script_id,
{ task = task, is_write = true },
redis_zscore_script_cb,
{ replies_sender_key },
replies_recipients_addrs)
+
+ if not ret then
+ rspamd_logger.errx(task, "redis script request wasn't scheduled")
+ end
end
local function check_known_incoming_mail_callback(task)
diff --git a/src/plugins/lua/milter_headers.lua b/src/plugins/lua/milter_headers.lua
index 2daeeed78..17fc90562 100644
--- a/src/plugins/lua/milter_headers.lua
+++ b/src/plugins/lua/milter_headers.lua
@@ -138,7 +138,7 @@ local function milter_headers(task)
local function skip_wanted(hdr)
if settings_override then
- return true
+ return false
end
-- Normal checks
local function match_extended_headers_rcpt()
diff --git a/src/plugins/lua/mime_types.lua b/src/plugins/lua/mime_types.lua
index c69fa1e7b..73cd63c6a 100644
--- a/src/plugins/lua/mime_types.lua
+++ b/src/plugins/lua/mime_types.lua
@@ -128,6 +128,7 @@ local settings = {
inf = 4,
its = 4,
jnlp = 4,
+ ['library-ms'] = 4,
lnk = 4,
ksh = 4,
mad = 4,
@@ -179,6 +180,7 @@ local settings = {
reg = 4,
scf = 4,
scr = 4,
+ ['search-ms'] = 4,
shs = 4,
theme = 4,
url = 4,
@@ -406,9 +408,9 @@ local function check_mime_type(task)
local score2 = check_tables(ext2)
-- Check if detected extension match real extension
if detected_ext and detected_ext == ext then
- check_extension(score1, nil)
+ check_extension(score1, nil)
else
- check_extension(score1, score2)
+ check_extension(score1, score2)
end
-- Check for archive cloaking like .zip.gz
if settings['archive_extensions'][ext2]
diff --git a/src/plugins/lua/multimap.lua b/src/plugins/lua/multimap.lua
index b96c105b1..8bb62bef1 100644
--- a/src/plugins/lua/multimap.lua
+++ b/src/plugins/lua/multimap.lua
@@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-]]--
+]] --
if confighelp then
return
@@ -34,6 +34,16 @@ local redis_params
local fun = require "fun"
local N = 'multimap'
+-- SpamAssassin-like functionality
+local sa_atoms = {}
+local sa_scores = {}
+local sa_meta_rules = {}
+local sa_descriptions = {}
+
+-- Symbol state tracking for graceful map reloads
+-- States: 'available', 'loading', 'orphaned'
+local regexp_rules_symbol_states = {}
+
local multimap_grammar
-- Parse result in form: <symbol>:<score>|<symbol>|<score>
local function parse_multimap_value(parse_rule, p_ret)
@@ -54,7 +64,7 @@ local function parse_multimap_value(parse_rule, p_ret)
-- Matches: 55.97, -90.8, .9
number.decimal = (number.integer * -- Integer
(number.fractional ^ -1)) + -- Fractional
- (lpeg.S("+-") * number.fractional) -- Completely fractional number
+ (lpeg.S("+-") * number.fractional) -- Completely fractional number
local sym_start = lpeg.R("az", "AZ") + lpeg.S("_")
local sym_elt = sym_start + lpeg.R("09")
@@ -100,6 +110,607 @@ local function parse_multimap_value(parse_rule, p_ret)
return false, nil, 0.0, {}
end
+-- SpamAssassin-like line processing functions
+local function split_sa_line(str)
+ local result = {}
+ if not str then
+ return result
+ end
+
+ for token in string.gmatch(str, '%S+') do
+ table.insert(result, token)
+ end
+
+ return result
+end
+
+local function parse_sa_regexp(rule_symbol, re_expr)
+ -- Extract regexp and flags from /regexp/flags format
+ local re_str, flags = string.match(re_expr, '^/(.+)/([gimxsiu]*)$')
+ if not re_str then
+ re_str, flags = string.match(re_expr, '^m{(.+)}([gimxsiu]*)$')
+ end
+ if not re_str then
+ -- Try without delimiters
+ re_str = re_expr
+ flags = ''
+ end
+
+ if flags and flags ~= '' then
+ re_str = '(?' .. flags .. ')' .. re_str
+ end
+
+ local re = rspamd_regexp.create(re_str)
+ if not re then
+ rspamd_logger.errx(rspamd_config, 'cannot create regexp for %s: %s', rule_symbol, re_expr)
+ return nil
+ end
+
+ return re
+end
+
+local function words_to_sa_re(words, start)
+ return table.concat(fun.totable(fun.drop_n(start, words)), " ")
+end
+
+-- Helper function to create SA rule callbacks
+local function create_sa_atom_function(name, re, match_type, opts)
+ return function(task)
+ if not re then
+ rspamd_logger.errx(task, 're is missing for atom %s', name)
+ return 0
+ end
+
+ local function process_re_match(re_obj, tsk, re_type, header, strong)
+ local res = 0
+ if type(jit) == 'table' then
+ res = tsk:process_regexp(re_obj, re_type, header, strong)
+ else
+ res = tsk:process_regexp(re_obj, re_type, header, strong)
+ end
+ return res
+ end
+
+ local ret = 0
+
+ if match_type == 'header' then
+ ret = process_re_match(re, task, 'header', opts.header, opts.strong or false)
+ elseif match_type == 'body' then
+ ret = process_re_match(re, task, 'sabody')
+ elseif match_type == 'rawbody' then
+ ret = process_re_match(re, task, 'sarawbody')
+ elseif match_type == 'full' then
+ ret = process_re_match(re, task, 'body')
+ elseif match_type == 'uri' then
+ ret = process_re_match(re, task, 'url')
+ else
+ -- Default to body
+ ret = process_re_match(re, task, 'sabody')
+ end
+
+ if opts and opts.negate then
+ -- Negate the result for !~ operators
+ ret = (ret > 0) and 0 or 1
+ lua_util.debugm(N, task, 'SA atom %s negated result: %s', name, ret)
+ end
+
+ lua_util.debugm(N, task, 'SA atom %s result: %s', name, ret)
+ return ret
+ end
+end
+
+local function process_sa_line(rule, line)
+ line = lua_util.str_trim(line)
+
+ if string.len(line) == 0 or string.sub(line, 1, 1) == '#' then
+ return
+ end
+
+ -- Add debug logging
+ lua_util.debugm(N, rspamd_config, 'Processing SA line for rule %s: %s', rule.symbol, line)
+
+ local words = split_sa_line(line)
+ if not words or #words == 0 then
+ lua_util.debugm(N, rspamd_config, 'Skipping empty or invalid line: %s', line)
+ return
+ end
+
+ local rule_name = rule.symbol
+ local scope_name = rule.scope_name or rule_name
+
+ -- All regexps for this SA-style rule are registered in a dedicated scope
+ -- This allows clean removal and replacement when the map is reloaded
+
+ if words[1] == 'header' then
+ -- header SYMBOL Header =~ /regexp/flags
+ if #words >= 4 and (words[4] == '=~' or words[4] == '!~') then
+ local atom_name = words[2]
+ local header_name = words[3]
+ local re_expr = words_to_sa_re(words, 4)
+
+ -- Skip =~ or !~
+ re_expr = string.gsub(re_expr, '^[!=]~%s*', '')
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ -- Register regexp with cache in specific scope
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'header',
+ header = header_name,
+ pcre_only = false,
+ })
+
+ re:set_limit(0) -- No limit
+ re:set_max_hits(1)
+
+ local negate = (words[4] == '!~')
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'header', {
+ header = header_name,
+ strong = false,
+ negate = negate
+ })
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA header atom: %s for header %s (scope: %s)',
+ atom_name, header_name, scope_name)
+ end
+ end
+ elseif words[1] == 'body' then
+ -- body SYMBOL /regexp/flags
+ if #words >= 3 then
+ local atom_name = words[2]
+ local re_expr = words_to_sa_re(words, 2)
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'sabody',
+ pcre_only = false,
+ })
+
+ re:set_limit(0)
+ re:set_max_hits(1)
+
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'body', {})
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA body atom: %s (scope: %s)', atom_name, scope_name)
+ end
+ end
+ elseif words[1] == 'rawbody' then
+ -- rawbody SYMBOL /regexp/flags
+ if #words >= 3 then
+ local atom_name = words[2]
+ local re_expr = words_to_sa_re(words, 2)
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'sarawbody',
+ pcre_only = false,
+ })
+
+ re:set_limit(0)
+ re:set_max_hits(1)
+
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'rawbody', {})
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA rawbody atom: %s (scope: %s)', atom_name, scope_name)
+ end
+ end
+ elseif words[1] == 'uri' then
+ -- uri SYMBOL /regexp/flags
+ if #words >= 3 then
+ local atom_name = words[2]
+ local re_expr = words_to_sa_re(words, 2)
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'url',
+ pcre_only = false,
+ })
+
+ re:set_limit(0)
+ re:set_max_hits(1)
+
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'uri', {})
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA uri atom: %s (scope: %s)', atom_name, scope_name)
+ end
+ end
+ elseif words[1] == 'full' then
+ -- full SYMBOL /regexp/flags
+ if #words >= 3 then
+ local atom_name = words[2]
+ local re_expr = words_to_sa_re(words, 2)
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'body',
+ pcre_only = false,
+ })
+
+ re:set_limit(0)
+ re:set_max_hits(1)
+
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'full', {})
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA full atom: %s (scope: %s)', atom_name, scope_name)
+ end
+ end
+ elseif words[1] == 'meta' then
+ -- meta SYMBOL expression
+ if #words >= 3 then
+ local meta_name = words[2]
+ local meta_expr = words_to_sa_re(words, 2)
+
+ sa_meta_rules[meta_name] = {
+ symbol = meta_name,
+ expression = meta_expr,
+ rule_name = rule_name
+ }
+
+ -- Track symbol state
+ regexp_rules_symbol_states[meta_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'meta'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA meta rule: %s = %s', meta_name, meta_expr)
+ end
+ elseif words[1] == 'score' then
+ -- score SYMBOL value
+ if #words >= 3 then
+ local score_symbol = words[2]
+ local score_value = tonumber(words[3])
+
+ if score_value then
+ sa_scores[score_symbol] = score_value
+ lua_util.debugm(N, rspamd_config, 'added SA score: %s = %s', score_symbol, score_value)
+ end
+ end
+ elseif words[1] == 'describe' then
+ -- describe SYMBOL description text
+ if #words >= 3 then
+ local desc_symbol = words[2]
+ local desc_text = words_to_sa_re(words, 2)
+
+ sa_descriptions[desc_symbol] = desc_text
+ lua_util.debugm(N, rspamd_config, 'added SA description: %s = %s', desc_symbol, desc_text)
+ end
+ end
+end
+
+local function parse_sa_atom(str)
+ local atom = table.concat(fun.totable(fun.take_while(function(c)
+ if string.find(', \t()><+!|&\n', c, 1, true) then
+ return false
+ end
+ return true
+ end, fun.iter(str))), '')
+
+ return atom
+end
+
+-- Forward declaration for mutual recursion
+local create_sa_meta_callback
+
+local function gen_sa_process_atom_cb(task, rule_name)
+ return function(atom)
+ -- Check symbol state first
+ local state_info = regexp_rules_symbol_states[atom]
+ if state_info then
+ if state_info.state == 'orphaned' or state_info.state == 'loading' then
+ -- Double-check by looking at scope loaded state
+ local scope_loaded = false
+ for _, rule in ipairs(rules) do
+ if rule.symbol == state_info.rule_name and rule.scope_name then
+ scope_loaded = rspamd_config:is_regexp_scope_loaded(rule.scope_name)
+ break
+ end
+ end
+
+ if scope_loaded and (state_info.type == 'atom' and sa_atoms[atom]) then
+ -- Update state to available if scope is loaded and atom exists
+ state_info.state = 'available'
+ lua_util.debugm(N, task, 'regexp_rules atom %s was %s, but scope is loaded - marking as available',
+ atom, state_info.state)
+ else
+ lua_util.debugm(N, task, 'regexp_rules atom %s is %s, returning 0', atom, state_info.state)
+ return 0
+ end
+ end
+ end
+
+ local atom_cb = sa_atoms[atom]
+
+ if atom_cb then
+ local res = atom_cb(task)
+
+ -- Return result without logging each atom
+ return res
+ else
+ -- Check if this is a SA meta rule
+ local meta_rule = sa_meta_rules[atom]
+ if meta_rule then
+ local meta_cb = create_sa_meta_callback(meta_rule)
+ local res = meta_cb(task)
+ return res or 0
+ end
+
+ -- External atom - check if task has this symbol
+ if task:has_symbol(atom) then
+ return 1
+ end
+ end
+ return 0
+ end
+end
+
+create_sa_meta_callback = function(meta_rule)
+ return function(task)
+ -- Check symbol state before execution
+ local state_info = regexp_rules_symbol_states[meta_rule.symbol]
+ if state_info then
+ if state_info.state == 'orphaned' or state_info.state == 'loading' then
+ -- Double-check by looking at scope loaded state
+ local scope_loaded = false
+ for _, rule in ipairs(rules) do
+ if rule.symbol == state_info.rule_name and rule.scope_name then
+ scope_loaded = rspamd_config:is_regexp_scope_loaded(rule.scope_name)
+ break
+ end
+ end
+
+ if scope_loaded and sa_meta_rules[meta_rule.symbol] then
+ -- Update state to available if scope is loaded and meta rule exists
+ state_info.state = 'available'
+ lua_util.debugm(N, task, 'regexp_rules meta %s was %s, but scope is loaded - marking as available',
+ meta_rule.symbol, state_info.state)
+ else
+ lua_util.debugm(N, task, 'regexp_rules meta %s is %s, skipping execution',
+ meta_rule.symbol, state_info.state)
+ return 0
+ end
+ end
+ end
+
+ local cached = task:cache_get('sa_multimap_metas_processed')
+
+ if not cached then
+ cached = {}
+ task:cache_set('sa_multimap_metas_processed', cached)
+ end
+
+ local function exclude_sym_filter(sopt)
+ -- Exclude self and atoms starting with __
+ return sopt ~= meta_rule.symbol
+ end
+
+ local already_processed = cached[meta_rule.symbol]
+
+ if not (already_processed and already_processed['default']) then
+ local expression = rspamd_expression.create(meta_rule.expression,
+ parse_sa_atom,
+ rspamd_config:get_mempool())
+ if not expression then
+ rspamd_logger.errx(rspamd_config, 'Cannot parse SA meta expression: %s', meta_rule.expression)
+ return
+ end
+
+ local function exec_symbol(cur_res)
+ local res, trace = expression:process_traced(gen_sa_process_atom_cb(task, meta_rule.rule_name))
+
+ if res > 0 then
+ local filtered_trace = fun.totable(fun.take_n(5,
+ fun.map(function(elt)
+ return elt:gsub('^__', '')
+ end, fun.filter(exclude_sym_filter, trace))))
+ lua_util.debugm(N, task, 'SA meta %s matched with result: %s; trace %s; filtered trace %s',
+ meta_rule.symbol, res, trace, filtered_trace)
+ task:insert_result_named(cur_res, meta_rule.symbol, 1.0, filtered_trace)
+ end
+
+ if not cached[meta_rule.symbol] then
+ cached[meta_rule.symbol] = {}
+ end
+ cached[meta_rule.symbol][cur_res] = res
+
+ return res
+ end
+
+ -- Invoke for all named results
+ local named_results = task:get_all_named_results()
+ for _, cur_res in ipairs(named_results) do
+ exec_symbol(cur_res)
+ end
+ else
+ -- We have cached the result
+ local res = already_processed['default'] or 0
+ lua_util.debugm(N, task, 'cached SA meta result for %s: %s', meta_rule.symbol, res)
+ end
+ end
+end
+
+-- Initialize SA meta rules after all atoms are processed
+local function finalize_sa_rules()
+ lua_util.debugm(N, rspamd_config, 'Finalizing SA rules - processing %s meta rules',
+ fun.length(sa_meta_rules))
+
+ for meta_name, meta_rule in pairs(sa_meta_rules) do
+ local score = sa_scores[meta_name] or 1.0
+ local description = sa_descriptions[meta_name] or ('multimap symbol ' .. meta_name)
+
+ lua_util.debugm(N, rspamd_config, 'Registering SA meta rule %s (score: %s, expression: %s)',
+ meta_name, score, meta_rule.expression)
+
+ local id = rspamd_config:register_symbol({
+ name = meta_name,
+ weight = score,
+ callback = create_sa_meta_callback(meta_rule),
+ type = 'normal',
+ flags = 'one_shot',
+ augmentations = {},
+ })
+
+ lua_util.debugm(N, rspamd_config, 'Successfully registered SA meta symbol %s with id %s (callback attached)',
+ meta_name, id)
+
+ rspamd_config:set_metric_symbol({
+ name = meta_name,
+ score = score,
+ description = description,
+ group = N,
+ })
+
+ -- Also register meta rule as an atom so it can be used in other meta expressions
+ sa_atoms[meta_name] = create_sa_meta_callback(meta_rule)
+
+ -- Mark symbol as available
+ if regexp_rules_symbol_states[meta_name] then
+ regexp_rules_symbol_states[meta_name].state = 'available'
+ else
+ regexp_rules_symbol_states[meta_name] = {
+ state = 'available',
+ rule_name = meta_rule.rule_name,
+ type = 'meta'
+ }
+ end
+
+ lua_util.debugm(N, rspamd_config, 'registered SA meta symbol: %s (score: %s)',
+ meta_name, score)
+ end
+
+ -- Mark orphaned symbols - only check meta symbols (not atoms) since atoms are just expression parts
+ for symbol, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' and state_info.state == 'available' and not sa_meta_rules[symbol] then
+ state_info.state = 'orphaned'
+ state_info.orphaned_at = os.time()
+ lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as orphaned', symbol)
+ end
+ end
+
+ lua_util.debugm(N, rspamd_config, 'SA rules finalization complete: registered %s meta rules with callbacks',
+ fun.length(sa_meta_rules))
+end
+
+-- Helper function to get regexp_rules symbol state statistics (only meta symbols, not atoms)
+local function get_regexp_rules_symbol_stats()
+ local stats = {
+ available = 0,
+ loading = 0,
+ orphaned = 0,
+ total = 0
+ }
+
+ for _, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' then
+ stats[state_info.state] = (stats[state_info.state] or 0) + 1
+ stats.total = stats.total + 1
+ end
+ end
+
+ return stats
+end
+
+-- Helper function to synchronize symbol states with loaded scopes
+local function sync_regexp_rules_symbol_states()
+ lua_util.debugm(N, rspamd_config, 'Synchronizing regexp_rules symbol states with loaded scopes')
+
+ -- Check each rule to see if its scope is loaded
+ for _, rule in ipairs(rules) do
+ if rule.type == 'regexp_rules' and rule.scope_name then
+ local scope_loaded = rspamd_config:is_regexp_scope_loaded(rule.scope_name)
+
+ if scope_loaded then
+ -- Mark all meta symbols for this rule as available (atoms are just expression parts)
+ local updated_count = 0
+ for _, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' and state_info.rule_name == rule.symbol and state_info.state ~= 'available' then
+ state_info.state = 'available'
+ updated_count = updated_count + 1
+ end
+ end
+
+ lua_util.debugm(N, rspamd_config, 'Scope %s is loaded, marked %s symbols as available',
+ rule.scope_name, updated_count)
+ else
+ lua_util.debugm(N, rspamd_config, 'Scope %s is not loaded', rule.scope_name)
+ end
+ end
+ end
+
+ local stats = get_regexp_rules_symbol_stats()
+ lua_util.debugm(N, rspamd_config, 'Symbol state stats after sync: available=%s, loading=%s, orphaned=%s, total=%s',
+ stats.available, stats.loading, stats.orphaned, stats.total)
+end
+
+-- Optional cleanup function to remove old orphaned symbols (can be called periodically)
+local function cleanup_orphaned_regexp_rules_symbols(max_age_seconds)
+ max_age_seconds = max_age_seconds or 3600 -- Default to 1 hour
+ local current_time = os.time()
+ local removed = 0
+
+ for symbol, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' and state_info.state == 'orphaned' and state_info.orphaned_at then
+ if (current_time - state_info.orphaned_at) > max_age_seconds then
+ regexp_rules_symbol_states[symbol] = nil
+ -- Only meta rules should be cleaned up from sa_meta_rules
+ sa_meta_rules[symbol] = nil
+ removed = removed + 1
+ lua_util.debugm(N, rspamd_config, 'cleaned up orphaned regexp_rules symbol: %s', symbol)
+ end
+ end
+ end
+
+ if removed > 0 then
+ lua_util.debugm(N, rspamd_config, 'cleaned up %s orphaned regexp_rules symbols', removed)
+ end
+
+ return removed
+end
+
local value_types = {
ip = {
get_value = function(ip)
@@ -531,7 +1142,7 @@ local function multimap_query_redis(key, task, value, callback)
false, -- is write
redis_map_cb, --callback
cmd, -- command
- srch -- arguments
+ srch -- arguments
)
end
@@ -631,7 +1242,6 @@ local function multimap_callback(task, rule)
else
task:insert_result(forced, symbol, score, tostring(opt))
end
-
else
task:insert_result(forced, symbol, score)
end
@@ -671,7 +1281,6 @@ local function multimap_callback(task, rule)
local fn = multimap_filters[r.type]
if fn then
-
local filtered_value = fn(task, r.filter, value, r)
lua_util.debugm(N, task, 'apply filter %s for rule %s: %s -> %s',
r.filter, r.symbol, value, filtered_value)
@@ -1097,6 +1706,12 @@ local function multimap_callback(task, rule)
end
end
end,
+ regexp_rules = function()
+ -- For regexp_rules, the meta rules are registered as separate symbols
+ -- This is just a placeholder callback
+ lua_util.debugm(N, task, 'Regexp rules callback for %s - meta rules are registered as separate symbols',
+ rule.symbol)
+ end,
}
local rt = rule.type
@@ -1184,7 +1799,8 @@ local function add_multimap_rule(key, newrule)
country = true,
mempool = true,
selector = true,
- combined = true
+ combined = true,
+ regexp_rules = true
}
if newrule['message_func'] then
@@ -1275,6 +1891,145 @@ local function add_multimap_rule(key, newrule)
else
ret = true
end
+ elseif newrule.type == 'regexp_rules' then
+ -- SpamAssassin-like map processing using callback map with line-by-line processing
+ local map_ucl = newrule.map
+ if type(map_ucl) == 'string' then
+ -- Convert string URL to UCL format
+ map_ucl = {
+ url = map_ucl,
+ description = newrule.description
+ }
+ elseif type(map_ucl) == 'table' and not map_ucl.url and not map_ucl.urls then
+ rspamd_logger.errx(rspamd_config, 'SA map %s has no URL defined', newrule.symbol)
+ return nil
+ end
+
+ -- Set scope name for this regexp_rules map
+ local scope_name = newrule.symbol
+ newrule.scope_name = scope_name
+
+ -- Remove existing scope if it exists to ensure clean state
+ if rspamd_config:find_regexp_scope(scope_name) then
+ lua_util.debugm(N, rspamd_config, 'removing existing regexp scope: %s', scope_name)
+ rspamd_config:remove_regexp_scope(scope_name)
+ end
+
+ -- Mark the scope as unloaded during map processing
+ -- The scope will be created automatically when first regexp is added
+ local first_line_processed = false
+
+ -- Create callback map with by_line processing
+ newrule.map_obj = rspamd_config:add_map({
+ type = "callback",
+ url = map_ucl.url or map_ucl.urls or map_ucl,
+ description = newrule.description or 'SA-style multimap: ' .. newrule.symbol,
+ callback = function(pseudo_key, pseudo_value)
+ -- We have values being parsed as kv pairs, but they are not, so we concat them and use as a line
+ local line = pseudo_key .. ' ' .. pseudo_value
+ -- Add debug logging to see if callback is called
+ lua_util.debugm(N, rspamd_config, 'regexp_rules callback called for line: %s', line)
+
+ -- Mark scope as unloaded on first line
+ if not first_line_processed then
+ first_line_processed = true
+ lua_util.debugm(N, rspamd_config, 'processing first line of regexp_rules map %s', newrule.symbol)
+
+ -- Mark all existing symbols for this scope as loading
+ for symbol, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.rule_name == newrule.symbol then
+ state_info.state = 'loading'
+ lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as loading for scope %s reload',
+ symbol, scope_name)
+ end
+ end
+
+ -- Clear atoms and meta rules for this scope
+ local symbols_to_remove = {}
+ for symbol, _ in pairs(sa_meta_rules) do
+ if regexp_rules_symbol_states[symbol] and regexp_rules_symbol_states[symbol].rule_name == newrule.symbol then
+ table.insert(symbols_to_remove, symbol)
+ end
+ end
+
+ for _, symbol in ipairs(symbols_to_remove) do
+ sa_atoms[symbol] = nil
+ sa_meta_rules[symbol] = nil
+ lua_util.debugm(N, rspamd_config, 'cleared regexp_rules symbol %s for scope %s reload',
+ symbol, scope_name)
+ end
+
+ -- The scope will be created by process_sa_line when first regexp is added
+ -- We mark it as unloaded immediately after creation
+ rspamd_config:set_regexp_scope_loaded(scope_name, false)
+ lua_util.debugm(N, rspamd_config, 'marked regexp scope %s as unloaded during processing', scope_name)
+ end
+ process_sa_line(newrule, line)
+ end,
+ by_line = true, -- Process line by line
+ opaque_data = false, -- Use plain strings
+ })
+
+ -- Add on_load callback to mark scope as loaded when map processing is complete
+ if newrule.map_obj then
+ newrule.map_obj:on_load(function()
+ lua_util.debugm(N, rspamd_config, 'regexp_rules map %s loaded successfully', newrule.symbol)
+
+ -- Mark all meta symbols for this scope as available (atoms are just expression parts)
+ for symbol, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' and state_info.rule_name == newrule.symbol then
+ if state_info.state == 'loading' then
+ -- Check if this meta symbol still exists in the rules
+ if sa_meta_rules[symbol] then
+ state_info.state = 'available'
+ lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as available after map load', symbol)
+ else
+ -- Symbol was removed in the new map
+ state_info.state = 'orphaned'
+ state_info.orphaned_at = os.time()
+ lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as orphaned after map load', symbol)
+ end
+ end
+ end
+ end
+
+ -- Mark scope as loaded when map processing is complete
+ -- Check if scope exists (it might not if map was empty)
+ if rspamd_config:find_regexp_scope(scope_name) then
+ rspamd_config:set_regexp_scope_loaded(scope_name, true)
+ lua_util.debugm(N, rspamd_config, 'marked regexp scope %s as loaded after map processing', scope_name)
+
+ -- Trigger hyperscan compilation for this updated scope
+ newrule.map_obj:trigger_hyperscan_compilation()
+ lua_util.debugm(N, rspamd_config, 'triggered hyperscan compilation for scope %s after map loading',
+ scope_name)
+ else
+ lua_util.debugm(N, rspamd_config, 'regexp scope %s not created (empty map)', scope_name)
+ end
+
+ -- Synchronize symbol states after map load to ensure all processes see correct states
+ sync_regexp_rules_symbol_states()
+
+ -- Finalize SA rules immediately after map load
+ finalize_sa_rules()
+
+ -- Promote symcache resort after dynamic symbol registration
+ rspamd_config:promote_symbols_cache_resort()
+ lua_util.debugm(N, rspamd_config, 'promoted symcache resort after loading SA rules from map %s',
+ newrule.symbol)
+ end)
+ end
+
+ if newrule.map_obj then
+ -- Mark this rule as using SA functionality
+ newrule.uses_sa = true
+ lua_util.debugm(N, rspamd_config, 'created regexp_rules map %s with scope: %s',
+ newrule.symbol, scope_name)
+ ret = true
+ else
+ rspamd_logger.warnx(rspamd_config, 'Cannot add SA-style rule: map doesn\'t exists: %s',
+ newrule['map'])
+ end
else
if newrule['type'] == 'ip' then
newrule.map_obj = lua_maps.map_add_from_ucl(newrule.map, 'radix',
@@ -1282,7 +2037,7 @@ local function add_multimap_rule(key, newrule)
if newrule.map_obj then
ret = true
else
- rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1',
+ rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s',
newrule['map'])
end
elseif newrule['type'] == 'received' then
@@ -1303,7 +2058,7 @@ local function add_multimap_rule(key, newrule)
if newrule.map_obj then
ret = true
else
- rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1',
+ rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s',
newrule['map'])
end
else
@@ -1312,12 +2067,11 @@ local function add_multimap_rule(key, newrule)
if newrule.map_obj then
ret = true
else
- rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1',
+ rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s',
newrule['map'])
end
end
elseif known_generic_types[newrule.type] then
-
if newrule.filter == 'ip_addr' then
newrule.map_obj = lua_maps.map_add_from_ucl(newrule.map, 'radix',
newrule.description)
@@ -1328,11 +2082,14 @@ local function add_multimap_rule(key, newrule)
if newrule.map_obj then
ret = true
else
- rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1',
+ rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s',
newrule['map'])
end
elseif newrule['type'] == 'dnsbl' then
ret = true
+ else
+ rspamd_logger.errx(rspamd_config, 'cannot add rule %s: invalid type %s',
+ key, newrule['type'])
end
end
@@ -1390,6 +2147,29 @@ end
local opts = rspamd_config:get_all_opt(N)
if opts and type(opts) == 'table' then
redis_params = rspamd_parse_redis_server(N)
+
+ -- Initialize regexp_rules symbol states from existing sa_atoms and sa_meta_rules
+ -- This helps with module reload scenarios
+ for atom_name, _ in pairs(sa_atoms) do
+ if not regexp_rules_symbol_states[atom_name] then
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'available',
+ rule_name = 'unknown',
+ type = 'atom'
+ }
+ end
+ end
+
+ for meta_name, meta_rule in pairs(sa_meta_rules) do
+ if not regexp_rules_symbol_states[meta_name] then
+ regexp_rules_symbol_states[meta_name] = {
+ state = 'available',
+ rule_name = meta_rule.rule_name or 'unknown',
+ type = 'meta'
+ }
+ end
+ end
+
for k, m in pairs(opts) do
if type(m) == 'table' and m['type'] then
local rule = add_multimap_rule(k, m)
@@ -1462,5 +2242,29 @@ if opts and type(opts) == 'table' then
if #rules == 0 then
lua_util.disable_module(N, "config")
+ else
+ -- Finalize SpamAssassin-like rules after all maps are processed
+ local has_sa_rules = false
+ for _, rule in ipairs(rules) do
+ if rule.uses_sa then
+ has_sa_rules = true
+ break
+ end
+ end
+
+ if has_sa_rules then
+ -- Add a callback to synchronize symbol states in worker processes
+ rspamd_config:add_on_load(function(cfg, ev_base, worker)
+ -- Synchronize symbol states with loaded scopes in worker processes
+ if worker then
+ sync_regexp_rules_symbol_states()
+ end
+ end)
+
+ -- Export utility functions for debugging/monitoring
+ rspamd_plugins.multimap = rspamd_plugins.multimap or {}
+ rspamd_plugins.multimap.get_regexp_rules_symbol_stats = get_regexp_rules_symbol_stats
+ rspamd_plugins.multimap.cleanup_orphaned_regexp_rules_symbols = cleanup_orphaned_regexp_rules_symbols
+ end
end
end
diff --git a/src/plugins/lua/ratelimit.lua b/src/plugins/lua/ratelimit.lua
index c20e61b17..d463658fa 100644
--- a/src/plugins/lua/ratelimit.lua
+++ b/src/plugins/lua/ratelimit.lua
@@ -373,7 +373,7 @@ local function ratelimit_cb(task)
local function gen_check_cb(prefix, bucket, lim_name, lim_key)
return function(err, data)
if err then
- rspamd_logger.errx('cannot check limit %s: %s %s', prefix, err, data)
+ rspamd_logger.errx('cannot check limit %s: %s', prefix, err)
elseif type(data) == 'table' and data[1] then
lua_util.debugm(N, task,
"got reply for limit %s (%s / %s); %s burst, %s:%s dyn, %s leaked",
@@ -416,7 +416,7 @@ local function ratelimit_cb(task)
task:set_pre_result('soft reject',
message_func(task, lim_name, prefix, bucket, lim_key), N)
else
- task:set_pre_result('soft reject', bucket.message)
+ task:set_pre_result('soft reject', bucket.message, N)
end
end
end
@@ -476,7 +476,7 @@ local function maybe_cleanup_pending(task)
local bucket = v.bucket
local function cleanup_cb(err, data)
if err then
- rspamd_logger.errx('cannot cleanup limit %s: %s %s', k, err, data)
+ rspamd_logger.errx('cannot cleanup limit %s: %s', k, err)
else
lua_util.debugm(N, task, 'cleaned pending bucked for %s: %s', k, data)
end
diff --git a/src/plugins/lua/rbl.lua b/src/plugins/lua/rbl.lua
index af4a4cd15..b5b904b00 100644
--- a/src/plugins/lua/rbl.lua
+++ b/src/plugins/lua/rbl.lua
@@ -1077,7 +1077,7 @@ local function add_rbl(key, rbl, global_opts)
rbl.selector_flatten)
if not sel then
- rspamd_logger.errx('invalid selector for rbl rule %s: %s', key, selector)
+ rspamd_logger.errx(rspamd_config, 'invalid selector for rbl rule %s: %s', key, selector)
return false
end
diff --git a/src/plugins/lua/replies.lua b/src/plugins/lua/replies.lua
index 08fb68bc7..2f0153d00 100644
--- a/src/plugins/lua/replies.lua
+++ b/src/plugins/lua/replies.lua
@@ -79,8 +79,8 @@ local function configure_redis_scripts(_, _)
end
]]
local set_script_zadd_global = lua_util.jinja_template(redis_script_zadd_global,
- { max_global_size = settings.max_global_size })
- global_replies_set_script = lua_redis.add_redis_script(set_script_zadd_global, redis_params)
+ { max_global_size = settings.max_global_size })
+ global_replies_set_script = lua_redis.add_redis_script(set_script_zadd_global, redis_params)
local redis_script_zadd_local = [[
redis.call('ZREMRANGEBYRANK', KEYS[1], 0, -({= max_local_size =} + 1)) -- keeping size of local replies set
@@ -102,7 +102,7 @@ local function configure_redis_scripts(_, _)
end
]]
local set_script_zadd_local = lua_util.jinja_template(redis_script_zadd_local,
- { expire_time = settings.expire, max_local_size = settings.max_local_size })
+ { expire_time = settings.expire, max_local_size = settings.max_local_size })
local_replies_set_script = lua_redis.add_redis_script(set_script_zadd_local, redis_params)
end
@@ -110,7 +110,7 @@ local function replies_check(task)
local in_reply_to
local function check_recipient(stored_rcpt)
- local rcpts = task:get_recipients('mime')
+ local rcpts = task:get_recipients('smtp')
lua_util.debugm(N, task, 'recipients: %s', rcpts)
if rcpts then
local filter_predicate = function(input_rcpt)
@@ -119,7 +119,7 @@ local function replies_check(task)
return real_rcpt_h == stored_rcpt
end
- if fun.any(filter_predicate, fun.map(function(rcpt)
+ if fun.all(filter_predicate, fun.map(function(rcpt)
return rcpt.addr or ''
end, rcpts)) then
lua_util.debugm(N, task, 'reply to %s validated', in_reply_to)
@@ -155,9 +155,9 @@ local function replies_check(task)
end
lua_redis.exec_redis_script(global_replies_set_script,
- { task = task, is_write = true },
- zadd_global_set_cb,
- { global_key }, params)
+ { task = task, is_write = true },
+ zadd_global_set_cb,
+ { global_key }, params)
end
local function add_to_replies_set(recipients)
@@ -173,7 +173,7 @@ local function replies_check(task)
local params = recipients
lua_util.debugm(N, task,
- 'Adding recipients %s to sender %s local replies set', recipients, sender_key)
+ 'Adding recipients %s to sender %s local replies set', recipients, sender_key)
local function zadd_cb(err, _)
if err ~= nil then
@@ -189,9 +189,9 @@ local function replies_check(task)
table.insert(params, 1, task_time_str)
lua_redis.exec_redis_script(local_replies_set_script,
- { task = task, is_write = true },
- zadd_cb,
- { sender_key }, params)
+ { task = task, is_write = true },
+ zadd_cb,
+ { sender_key }, params)
end
local function redis_get_cb(err, data, addr)
@@ -387,7 +387,7 @@ if opts then
end
lua_redis.register_prefix(settings.sender_prefix, N,
- 'Prefix to identify replies sets')
+ 'Prefix to identify replies sets')
local id = rspamd_config:register_symbol({
name = 'REPLIES_CHECK',
diff --git a/src/plugins/lua/reputation.lua b/src/plugins/lua/reputation.lua
index bd7d91932..eacaee064 100644
--- a/src/plugins/lua/reputation.lua
+++ b/src/plugins/lua/reputation.lua
@@ -200,7 +200,9 @@ local function dkim_reputation_filter(task, rule)
end
end
- if sel_tld and requests[sel_tld] then
+ if rule.selector.config.exclusion_map and sel_tld and rule.selector.config.exclusion_map:get_key(sel_tld) then
+ lua_util.debugm(N, task, 'DKIM domain %s is excluded from reputation scoring', sel_tld)
+ elseif sel_tld and requests[sel_tld] then
if requests[sel_tld] == 'a' then
rep_accepted = rep_accepted + generic_reputation_calc(v, rule, 1.0, task)
end
@@ -243,9 +245,13 @@ local function dkim_reputation_idempotent(task, rule)
if sc then
for dom, res in pairs(requests) do
- -- tld + "." + check_result, e.g. example.com.+ - reputation for valid sigs
- local query = string.format('%s.%s', dom, res)
- rule.backend.set_token(task, rule, nil, query, sc)
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(dom) then
+ lua_util.debugm(N, task, 'DKIM domain %s is excluded from reputation update', dom)
+ else
+ -- tld + "." + check_result, e.g. example.com.+ - reputation for valid sigs
+ local query = string.format('%s.%s', dom, res)
+ rule.backend.set_token(task, rule, nil, query, sc)
+ end
end
end
end
@@ -277,6 +283,7 @@ local dkim_selector = {
outbound = true,
inbound = true,
max_accept_adjustment = 2.0, -- How to adjust accepted DKIM score
+ exclusion_map = nil
},
dependencies = { "DKIM_TRACE" },
filter = dkim_reputation_filter, -- used to get scores
@@ -356,10 +363,14 @@ local function url_reputation_filter(task, rule)
for i, res in pairs(results) do
local req = requests[i]
if req then
- local url_score = generic_reputation_calc(res, rule,
- req[2] / mhits, task)
- lua_util.debugm(N, task, "score for url %s is %s, score=%s", req[1], url_score, score)
- score = score + url_score
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(req[1]) then
+ lua_util.debugm(N, task, 'URL domain %s is excluded from reputation scoring', req[1])
+ else
+ local url_score = generic_reputation_calc(res, rule,
+ req[2] / mhits, task)
+ lua_util.debugm(N, task, "score for url %s is %s, score=%s", req[1], url_score, score)
+ score = score + url_score
+ end
end
end
@@ -386,7 +397,11 @@ local function url_reputation_idempotent(task, rule)
if sc then
for _, tld in ipairs(requests) do
- rule.backend.set_token(task, rule, nil, tld[1], sc)
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(tld[1]) then
+ lua_util.debugm(N, task, 'URL domain %s is excluded from reputation update', tld[1])
+ else
+ rule.backend.set_token(task, rule, nil, tld[1], sc)
+ end
end
end
end
@@ -401,6 +416,7 @@ local url_selector = {
check_from = true,
outbound = true,
inbound = true,
+ exclusion_map = nil
},
filter = url_reputation_filter, -- used to get scores
idempotent = url_reputation_idempotent -- used to set scores
@@ -439,6 +455,11 @@ local function ip_reputation_filter(task, rule)
ip = ip:apply_mask(cfg.ipv6_mask)
end
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(ip) then
+ lua_util.debugm(N, task, 'IP %s is excluded from reputation scoring', tostring(ip))
+ return
+ end
+
local pool = task:get_mempool()
local asn = pool:get_variable("asn")
local country = pool:get_variable("country")
@@ -554,6 +575,11 @@ local function ip_reputation_idempotent(task, rule)
ip = ip:apply_mask(cfg.ipv6_mask)
end
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(ip) then
+ lua_util.debugm(N, task, 'IP %s is excluded from reputation update', tostring(ip))
+ return
+ end
+
local pool = task:get_mempool()
local asn = pool:get_variable("asn")
local country = pool:get_variable("country")
@@ -600,6 +626,7 @@ local ip_selector = {
inbound = true,
ipv4_mask = 32, -- Mask bits for ipv4
ipv6_mask = 64, -- Mask bits for ipv6
+ exclusion_map = nil
},
--dependencies = {"ASN"}, -- ASN is a prefilter now...
init = ip_reputation_init,
@@ -621,6 +648,11 @@ local function spf_reputation_filter(task, rule)
local cr = require "rspamd_cryptobox_hash"
local hkey = cr.create(spf_record):base32():sub(1, 32)
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(hkey) then
+ lua_util.debugm(N, task, 'SPF record %s is excluded from reputation scoring', hkey)
+ return
+ end
+
lua_util.debugm(N, task, 'check spf record %s -> %s', spf_record, hkey)
local function tokens_cb(err, token, values)
@@ -649,6 +681,11 @@ local function spf_reputation_idempotent(task, rule)
local cr = require "rspamd_cryptobox_hash"
local hkey = cr.create(spf_record):base32():sub(1, 32)
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(hkey) then
+ lua_util.debugm(N, task, 'SPF record %s is excluded from reputation update', hkey)
+ return
+ end
+
lua_util.debugm(N, task, 'set spf record %s -> %s = %s',
spf_record, hkey, sc)
rule.backend.set_token(task, rule, nil, hkey, sc)
@@ -663,6 +700,7 @@ local spf_selector = {
max_score = nil,
outbound = true,
inbound = true,
+ exclusion_map = nil
},
dependencies = { "R_SPF_ALLOW" },
filter = spf_reputation_filter, -- used to get scores
@@ -697,6 +735,13 @@ local function generic_reputation_init(rule)
'Whitelisted selectors')
end
+ if cfg.exclusion_map then
+ cfg.exclusion_map = lua_maps.map_add('reputation',
+ 'generic_exclusion',
+ 'set',
+ 'Excluded selectors')
+ end
+
return true
end
@@ -706,6 +751,10 @@ local function generic_reputation_filter(task, rule)
local function tokens_cb(err, token, values)
if values then
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(token) then
+ lua_util.debugm(N, task, 'Generic selector token %s is excluded from reputation scoring', token)
+ return
+ end
local score = generic_reputation_calc(values, rule, 1.0, task)
if math.abs(score) > 1e-3 then
@@ -742,14 +791,22 @@ local function generic_reputation_idempotent(task, rule)
if sc then
if type(selector_res) == 'table' then
fun.each(function(e)
- lua_util.debugm(N, task, 'set generic selector (%s) %s = %s',
- rule['symbol'], e, sc)
- rule.backend.set_token(task, rule, nil, e, sc)
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(e) then
+ lua_util.debugm(N, task, 'Generic selector token %s is excluded from reputation update', e)
+ else
+ lua_util.debugm(N, task, 'set generic selector (%s) %s = %s',
+ rule['symbol'], e, sc)
+ rule.backend.set_token(task, rule, nil, e, sc)
+ end
end, selector_res)
else
- lua_util.debugm(N, task, 'set generic selector (%s) %s = %s',
- rule['symbol'], selector_res, sc)
- rule.backend.set_token(task, rule, nil, selector_res, sc)
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(selector_res) then
+ lua_util.debugm(N, task, 'Generic selector token %s is excluded from reputation update', selector_res)
+ else
+ lua_util.debugm(N, task, 'set generic selector (%s) %s = %s',
+ rule['symbol'], selector_res, sc)
+ rule.backend.set_token(task, rule, nil, selector_res, sc)
+ end
end
end
end
@@ -764,6 +821,7 @@ local generic_selector = {
selector = ts.string,
delimiter = ts.string,
whitelist = ts.one_of(lua_maps.map_schema, lua_maps_exprs.schema):is_optional(),
+ exclusion_map = ts.one_of(lua_maps.map_schema, lua_maps_exprs.schema):is_optional()
},
config = {
lower_bound = 10, -- minimum number of messages to be scored
@@ -773,7 +831,8 @@ local generic_selector = {
inbound = true,
selector = nil,
delimiter = ':',
- whitelist = nil
+ whitelist = nil,
+ exclusion_map = nil
},
init = generic_reputation_init,
filter = generic_reputation_filter, -- used to get scores
@@ -1107,7 +1166,7 @@ local backends = {
name = '1m',
mult = 1.0,
}
- }, -- What buckets should be used, default 1h and 1month
+ }, -- What buckets should be used, default 1month
},
init = reputation_redis_init,
get_token = reputation_redis_get_token,
@@ -1267,6 +1326,24 @@ local function parse_rule(name, tbl)
end
end
+ -- Parse exclusion_map for reputation exclusion lists
+ if rule.config.exclusion_map then
+ local map_type = 'set' -- Default to set for string-based selectors (dkim, url, spf, generic)
+ if sel_type == 'ip' or sel_type == 'sender' then
+ map_type = 'radix' -- Use radix for IP-based selectors
+ end
+ local map = lua_maps.map_add_from_ucl(rule.config.exclusion_map,
+ map_type,
+ sel_type .. ' reputation exclusion map')
+ if not map then
+ rspamd_logger.errx(rspamd_config, "cannot parse exclusion map config for %s: (%s)",
+ sel_type,
+ rule.config.exclusion_map)
+ return false
+ end
+ rule.config.exclusion_map = map
+ end
+
local symbol = rule.selector.config.symbol or name
if tbl.symbol then
symbol = tbl.symbol
@@ -1387,4 +1464,4 @@ if opts['rules'] then
end
else
lua_util.disable_module(N, "config")
-end
+end \ No newline at end of file
diff --git a/src/plugins/lua/settings.lua b/src/plugins/lua/settings.lua
index 0f8e00723..c576e1325 100644
--- a/src/plugins/lua/settings.lua
+++ b/src/plugins/lua/settings.lua
@@ -1275,7 +1275,7 @@ local function gen_redis_callback(handler, id)
ucl_err)
else
local obj = parser:get_object()
- rspamd_logger.infox(task, "<%1> apply settings according to redis rule %2",
+ rspamd_logger.infox(task, "<%s> apply settings according to redis rule %s",
task:get_message_id(), id)
apply_settings(task, obj, nil, 'redis')
break
@@ -1283,7 +1283,7 @@ local function gen_redis_callback(handler, id)
end
end
elseif err then
- rspamd_logger.errx(task, 'Redis error: %1', err)
+ rspamd_logger.errx(task, 'Redis error: %s', err)
end
end
@@ -1371,7 +1371,7 @@ if set_section and set_section[1] and type(set_section[1]) == "string" then
opaque_data = true
}
if not rspamd_config:add_map(map_attrs) then
- rspamd_logger.errx(rspamd_config, 'cannot load settings from %1', set_section)
+ rspamd_logger.errx(rspamd_config, 'cannot load settings from %s', set_section)
end
elseif set_section and type(set_section) == "table" then
settings_map_pool = rspamd_mempool.create()
diff --git a/src/plugins/lua/spamassassin.lua b/src/plugins/lua/spamassassin.lua
index 3ea794495..c03481de2 100644
--- a/src/plugins/lua/spamassassin.lua
+++ b/src/plugins/lua/spamassassin.lua
@@ -221,7 +221,7 @@ local function handle_header_def(hline, cur_rule)
})
cur_rule['function'] = function(task)
if not re then
- rspamd_logger.errx(task, 're is missing for rule %1', h)
+ rspamd_logger.errx(task, 're is missing for rule %s', h)
return 0
end
@@ -272,7 +272,7 @@ local function handle_header_def(hline, cur_rule)
elseif func == 'case' then
cur_param['strong'] = true
else
- rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
+ rspamd_logger.warnx(rspamd_config, 'Function %s is not supported in %s',
func, cur_rule['symbol'])
end
end, fun.tail(args))
@@ -314,7 +314,7 @@ end
local function freemail_search(input)
local res = 0
local function trie_callback(number, pos)
- lua_util.debugm(N, rspamd_config, 'Matched pattern %1 at pos %2', freemail_domains[number], pos)
+ lua_util.debugm(N, rspamd_config, 'Matched pattern %s at pos %s', freemail_domains[number], pos)
res = res + 1
end
@@ -369,7 +369,7 @@ local function gen_eval_rule(arg)
end
return 0
else
- rspamd_logger.infox(rspamd_config, 'cannot create regexp %1', re)
+ rspamd_logger.infox(rspamd_config, 'cannot create regexp %s', re)
return 0
end
end
@@ -461,7 +461,7 @@ local function gen_eval_rule(arg)
end
end
else
- rspamd_logger.infox(task, 'unimplemented mime check %1', arg)
+ rspamd_logger.infox(task, 'unimplemented mime check %s', arg)
end
end
@@ -576,7 +576,7 @@ local function maybe_parse_sa_function(line)
local elts = split(line, '[^:]+')
arg = elts[2]
- lua_util.debugm(N, rspamd_config, 'trying to parse SA function %1 with args %2',
+ lua_util.debugm(N, rspamd_config, 'trying to parse SA function %s with args %s',
elts[1], elts[2])
local substitutions = {
{ '^exists:',
@@ -612,7 +612,7 @@ local function maybe_parse_sa_function(line)
end
if not func then
- rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %1',
+ rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %s',
arg)
else
return func(task)
@@ -685,7 +685,7 @@ local function process_sa_conf(f)
end
-- We have previous rule valid
if not cur_rule['symbol'] then
- rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
+ rspamd_logger.errx(rspamd_config, 'bad rule definition: %s', cur_rule)
end
rules[cur_rule['symbol']] = cur_rule
cur_rule = {}
@@ -695,15 +695,15 @@ local function process_sa_conf(f)
local function parse_score(words)
if #words == 3 then
-- score rule <x>
- lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[3])
+ lua_util.debugm(N, rspamd_config, 'found score for %s: %s', words[2], words[3])
return tonumber(words[3])
elseif #words == 6 then
-- score rule <x1> <x2> <x3> <x4>
-- we assume here that bayes and network are enabled and select <x4>
- lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[6])
+ lua_util.debugm(N, rspamd_config, 'found score for %s: %s', words[2], words[6])
return tonumber(words[6])
else
- rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
+ rspamd_logger.errx(rspamd_config, 'invalid score for %s', words[2])
end
return 0
@@ -812,7 +812,7 @@ local function process_sa_conf(f)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if not cur_rule['re'] then
- rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
+ rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%s' for %s",
cur_rule['re_expr'], cur_rule['symbol'])
else
cur_rule['re']:set_max_hits(1)
@@ -829,8 +829,8 @@ local function process_sa_conf(f)
cur_rule['mime'] = false
end
- if cur_rule['re'] and cur_rule['symbol'] and
- (cur_rule['header'] or cur_rule['function']) then
+ if cur_rule['re'] and cur_rule['symbol']
+ and (cur_rule['header'] or cur_rule['function']) then
valid_rule = true
cur_rule['re']:set_max_hits(1)
if cur_rule['header'] and cur_rule['ordinary'] then
@@ -894,7 +894,7 @@ local function process_sa_conf(f)
cur_rule['function'] = func
valid_rule = true
else
- rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ rspamd_logger.infox(rspamd_config, 'unknown function %s', args)
end
end
elseif words[1] == "body" then
@@ -931,7 +931,7 @@ local function process_sa_conf(f)
cur_rule['function'] = func
valid_rule = true
else
- rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ rspamd_logger.infox(rspamd_config, 'unknown function %s', args)
end
end
elseif words[1] == "rawbody" then
@@ -968,7 +968,7 @@ local function process_sa_conf(f)
cur_rule['function'] = func
valid_rule = true
else
- rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ rspamd_logger.infox(rspamd_config, 'unknown function %s', args)
end
end
elseif words[1] == "full" then
@@ -1006,7 +1006,7 @@ local function process_sa_conf(f)
cur_rule['function'] = func
valid_rule = true
else
- rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ rspamd_logger.infox(rspamd_config, 'unknown function %s', args)
end
end
elseif words[1] == "uri" then
@@ -1265,11 +1265,11 @@ local function post_process()
if res then
local nre = rspamd_regexp.create(nexpr)
if not nre then
- rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r)
+ rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %s', r)
--rule['re'] = nil
else
local old_max_hits = rule['re']:get_max_hits()
- lua_util.debugm(N, rspamd_config, 'replace %1 -> %2', r, nexpr)
+ lua_util.debugm(N, rspamd_config, 'replace %s -> %s', r, nexpr)
rspamd_config:replace_regexp({
old_re = rule['re'],
new_re = nre,
@@ -1306,8 +1306,7 @@ local function post_process()
end
if not r['re'] then
- rspamd_logger.errx(task, 're is missing for rule %1 (%2 header)', k,
- h['header'])
+ rspamd_logger.errx(task, 're is missing for rule %s', h)
return 0
end
@@ -1434,7 +1433,7 @@ local function post_process()
fun.each(function(k, r)
local f = function(task)
if not r['re'] then
- rspamd_logger.errx(task, 're is missing for rule %1', k)
+ rspamd_logger.errx(task, 're is missing for rule %s', k)
return 0
end
@@ -1461,7 +1460,7 @@ local function post_process()
fun.each(function(k, r)
local f = function(task)
if not r['re'] then
- rspamd_logger.errx(task, 're is missing for rule %1', k)
+ rspamd_logger.errx(task, 're is missing for rule %s', k)
return 0
end
@@ -1486,7 +1485,7 @@ local function post_process()
fun.each(function(k, r)
local f = function(task)
if not r['re'] then
- rspamd_logger.errx(task, 're is missing for rule %1', k)
+ rspamd_logger.errx(task, 're is missing for rule %s', k)
return 0
end
@@ -1629,8 +1628,8 @@ local function post_process()
rspamd_config:register_dependency(k, rspamd_symbol)
external_deps[k][rspamd_symbol] = true
lua_util.debugm(N, rspamd_config,
- 'atom %1 is a direct foreign dependency, ' ..
- 'register dependency for %2 on %3',
+ 'atom %s is a direct foreign dependency, ' ..
+ 'register dependency for %s on %s',
a, k, rspamd_symbol)
end
end
@@ -1659,8 +1658,8 @@ local function post_process()
rspamd_config:register_dependency(k, dep)
external_deps[k][dep] = true
lua_util.debugm(N, rspamd_config,
- 'atom %1 is an indirect foreign dependency, ' ..
- 'register dependency for %2 on %3',
+ 'atom %s is an indirect foreign dependency, ' ..
+ 'register dependency for %s on %s',
a, k, dep)
nchanges = nchanges + 1
end
@@ -1694,10 +1693,10 @@ local function post_process()
-- Logging output
if freemail_domains then
freemail_trie = rspamd_trie.create(freemail_domains)
- rspamd_logger.infox(rspamd_config, 'loaded %1 freemail domains definitions',
+ rspamd_logger.infox(rspamd_config, 'loaded %s freemail domains definitions',
#freemail_domains)
end
- rspamd_logger.infox(rspamd_config, 'loaded %1 blacklist/whitelist elements',
+ rspamd_logger.infox(rspamd_config, 'loaded %s blacklist/whitelist elements',
sa_lists['elts'])
end
@@ -1739,7 +1738,7 @@ if type(section) == "table" then
process_sa_conf(f)
has_rules = true
else
- rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
+ rspamd_logger.errx(rspamd_config, "cannot open %s", matched)
end
end
end
@@ -1758,7 +1757,7 @@ if type(section) == "table" then
process_sa_conf(f)
has_rules = true
else
- rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
+ rspamd_logger.errx(rspamd_config, "cannot open %s", matched)
end
end
end
diff --git a/src/plugins/lua/trie.lua b/src/plugins/lua/trie.lua
index 7ba455289..7c7214b55 100644
--- a/src/plugins/lua/trie.lua
+++ b/src/plugins/lua/trie.lua
@@ -107,10 +107,10 @@ local function process_trie_file(symbol, cf)
local file = io.open(cf['file'])
if not file then
- rspamd_logger.errx(rspamd_config, 'Cannot open trie file %1', cf['file'])
+ rspamd_logger.errx(rspamd_config, 'Cannot open trie file %s', cf['file'])
else
if cf['binary'] then
- rspamd_logger.errx(rspamd_config, 'binary trie patterns are not implemented yet: %1',
+ rspamd_logger.errx(rspamd_config, 'binary trie patterns are not implemented yet: %s',
cf['file'])
else
for line in file:lines() do
@@ -123,7 +123,7 @@ end
local function process_trie_conf(symbol, cf)
if type(cf) ~= 'table' then
- rspamd_logger.errx(rspamd_config, 'invalid value for symbol %1: "%2", expected table',
+ rspamd_logger.errx(rspamd_config, 'invalid value for symbol %s: "%s", expected table',
symbol, cf)
return
end
@@ -145,17 +145,17 @@ if opts then
if #raw_patterns > 0 then
raw_trie = rspamd_trie.create(raw_patterns)
- rspamd_logger.infox(rspamd_config, 'registered raw search trie from %1 patterns', #raw_patterns)
+ rspamd_logger.infox(rspamd_config, 'registered raw search trie from %s patterns', #raw_patterns)
end
if #mime_patterns > 0 then
mime_trie = rspamd_trie.create(mime_patterns)
- rspamd_logger.infox(rspamd_config, 'registered mime search trie from %1 patterns', #mime_patterns)
+ rspamd_logger.infox(rspamd_config, 'registered mime search trie from %s patterns', #mime_patterns)
end
if #body_patterns > 0 then
body_trie = rspamd_trie.create(body_patterns)
- rspamd_logger.infox(rspamd_config, 'registered body search trie from %1 patterns', #body_patterns)
+ rspamd_logger.infox(rspamd_config, 'registered body search trie from %s patterns', #body_patterns)
end
local id = -1
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 414e1b3a6..df704b8ed 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -15,6 +15,10 @@
*/
/***MODULE:regexp
* rspamd module that implements different regexp rules
+ *
+ * For object-based configuration, you can specify:
+ * - `expression_flags`: array of strings or single string with expression flags
+ * - `"noopt"`: disable expression optimizations (useful for some SpamAssassin rules)
*/
@@ -32,6 +36,7 @@ struct regexp_module_item {
struct rspamd_expression *expr;
const char *symbol;
struct ucl_lua_funcdata *lua_function;
+ int expression_flags;
};
struct regexp_ctx {
@@ -68,12 +73,53 @@ regexp_get_context(struct rspamd_config *cfg)
}
/* Process regexp expression */
+static int
+parse_expression_flags(const ucl_object_t *flags_obj)
+{
+ int flags = 0;
+ const ucl_object_t *cur;
+ ucl_object_iter_t it = NULL;
+ const char *flag_name;
+
+ if (!flags_obj) {
+ return 0;
+ }
+
+ if (ucl_object_type(flags_obj) == UCL_ARRAY) {
+ /* Array of flag names */
+ while ((cur = ucl_object_iterate(flags_obj, &it, true)) != NULL) {
+ if (ucl_object_type(cur) == UCL_STRING) {
+ flag_name = ucl_object_tostring(cur);
+ if (strcmp(flag_name, "noopt") == 0) {
+ flags |= RSPAMD_EXPRESSION_FLAG_NOOPT;
+ }
+ else {
+ msg_warn("unknown expression flag: %s", flag_name);
+ }
+ }
+ }
+ }
+ else if (ucl_object_type(flags_obj) == UCL_STRING) {
+ /* Single flag name */
+ flag_name = ucl_object_tostring(flags_obj);
+ if (strcmp(flag_name, "noopt") == 0) {
+ flags |= RSPAMD_EXPRESSION_FLAG_NOOPT;
+ }
+ else {
+ msg_warn("unknown expression flag: %s", flag_name);
+ }
+ }
+
+ return flags;
+}
+
static gboolean
read_regexp_expression(rspamd_mempool_t *pool,
struct regexp_module_item *chain,
const char *symbol,
const char *line,
- struct rspamd_mime_expr_ud *ud)
+ struct rspamd_mime_expr_ud *ud,
+ int expression_flags)
{
struct rspamd_expression *e = NULL;
GError *err = NULL;
@@ -90,6 +136,7 @@ read_regexp_expression(rspamd_mempool_t *pool,
g_assert(e != NULL);
chain->expr = e;
+ chain->expression_flags = expression_flags;
return TRUE;
}
@@ -165,13 +212,14 @@ int regexp_module_config(struct rspamd_config *cfg, bool validate)
sizeof(struct regexp_module_item));
cur_item->symbol = ucl_object_key(value);
cur_item->magic = rspamd_regexp_cb_magic;
+ cur_item->expression_flags = 0;
ud.conf_obj = NULL;
ud.cfg = cfg;
if (!read_regexp_expression(cfg->cfg_pool,
cur_item, ucl_object_key(value),
- ucl_obj_tostring(value), &ud)) {
+ ucl_obj_tostring(value), &ud, 0)) {
if (validate) {
return FALSE;
}
@@ -193,6 +241,7 @@ int regexp_module_config(struct rspamd_config *cfg, bool validate)
cur_item->magic = rspamd_regexp_cb_magic;
cur_item->symbol = ucl_object_key(value);
cur_item->lua_function = ucl_object_toclosure(value);
+ cur_item->expression_flags = 0;
rspamd_symcache_add_symbol(cfg->cache,
cur_item->symbol,
@@ -222,12 +271,17 @@ int regexp_module_config(struct rspamd_config *cfg, bool validate)
sizeof(struct regexp_module_item));
cur_item->symbol = ucl_object_key(value);
cur_item->magic = rspamd_regexp_cb_magic;
+ cur_item->expression_flags = 0; /* Will be overwritten with parsed flags */
ud.cfg = cfg;
ud.conf_obj = value;
+ /* Look for expression_flags */
+ const ucl_object_t *flags_obj = ucl_object_lookup(value, "expression_flags");
+ int expr_flags = parse_expression_flags(flags_obj);
+
if (!read_regexp_expression(cfg->cfg_pool,
cur_item, ucl_object_key(value),
- ucl_obj_tostring(elt), &ud)) {
+ ucl_obj_tostring(elt), &ud, expr_flags)) {
if (validate) {
return FALSE;
}
@@ -253,6 +307,7 @@ int regexp_module_config(struct rspamd_config *cfg, bool validate)
cur_item->magic = rspamd_regexp_cb_magic;
cur_item->symbol = ucl_object_key(value);
cur_item->lua_function = ucl_object_toclosure(value);
+ cur_item->expression_flags = 0;
}
if (cur_item && (is_lua || valid_expression)) {
@@ -548,7 +603,7 @@ process_regexp_item(struct rspamd_task *task,
else {
/* Process expression */
if (item->expr) {
- res = rspamd_process_expression(item->expr, 0, task);
+ res = rspamd_process_expression(item->expr, item->expression_flags, task);
}
else {
msg_warn_task("FIXME: %s symbol is broken with new expressions",