diff options
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/chartable.cxx | 24 | ||||
-rw-r--r-- | src/plugins/fuzzy_check.c | 178 | ||||
-rw-r--r-- | src/plugins/lua/arc.lua | 204 | ||||
-rw-r--r-- | src/plugins/lua/bayes_expiry.lua | 182 | ||||
-rw-r--r-- | src/plugins/lua/contextal.lua | 2 | ||||
-rw-r--r-- | src/plugins/lua/fuzzy_collect.lua | 2 | ||||
-rw-r--r-- | src/plugins/lua/gpt.lua | 77 | ||||
-rw-r--r-- | src/plugins/lua/hfilter.lua | 1 | ||||
-rw-r--r-- | src/plugins/lua/history_redis.lua | 6 | ||||
-rw-r--r-- | src/plugins/lua/known_senders.lua | 62 | ||||
-rw-r--r-- | src/plugins/lua/milter_headers.lua | 2 | ||||
-rw-r--r-- | src/plugins/lua/mime_types.lua | 6 | ||||
-rw-r--r-- | src/plugins/lua/multimap.lua | 826 | ||||
-rw-r--r-- | src/plugins/lua/ratelimit.lua | 6 | ||||
-rw-r--r-- | src/plugins/lua/rbl.lua | 2 | ||||
-rw-r--r-- | src/plugins/lua/replies.lua | 26 | ||||
-rw-r--r-- | src/plugins/lua/reputation.lua | 113 | ||||
-rw-r--r-- | src/plugins/lua/settings.lua | 6 | ||||
-rw-r--r-- | src/plugins/lua/spamassassin.lua | 65 | ||||
-rw-r--r-- | src/plugins/lua/trie.lua | 12 | ||||
-rw-r--r-- | src/plugins/regexp.c | 63 |
21 files changed, 1529 insertions, 336 deletions
diff --git a/src/plugins/chartable.cxx b/src/plugins/chartable.cxx index a5c7cb899..c82748862 100644 --- a/src/plugins/chartable.cxx +++ b/src/plugins/chartable.cxx @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1696,7 +1696,7 @@ rspamd_can_alias_latin(int ch) static double rspamd_chartable_process_word_utf(struct rspamd_task *task, - rspamd_stat_token_t *w, + rspamd_word_t *w, gboolean is_url, unsigned int *ncap, struct chartable_ctx *chartable_module_ctx, @@ -1842,7 +1842,7 @@ rspamd_chartable_process_word_utf(struct rspamd_task *task, static double rspamd_chartable_process_word_ascii(struct rspamd_task *task, - rspamd_stat_token_t *w, + rspamd_word_t *w, gboolean is_url, struct chartable_ctx *chartable_module_ctx) { @@ -1931,17 +1931,17 @@ rspamd_chartable_process_part(struct rspamd_task *task, struct chartable_ctx *chartable_module_ctx, gboolean ignore_diacritics) { - rspamd_stat_token_t *w; + rspamd_word_t *w; unsigned int i, ncap = 0; double cur_score = 0.0; - if (part == nullptr || part->utf_words == nullptr || - part->utf_words->len == 0 || part->nwords == 0) { + if (part == nullptr || part->utf_words.a == nullptr || + kv_size(part->utf_words) == 0 || part->nwords == 0) { return FALSE; } - for (i = 0; i < part->utf_words->len; i++) { - w = &g_array_index(part->utf_words, rspamd_stat_token_t, i); + for (i = 0; i < kv_size(part->utf_words); i++) { + w = &kv_A(part->utf_words, i); if ((w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT)) { @@ -2015,13 +2015,13 @@ chartable_symbol_callback(struct rspamd_task *task, ignore_diacritics = TRUE; } - if (task->meta_words != nullptr && task->meta_words->len > 0) { - rspamd_stat_token_t *w; + if (task->meta_words.a && kv_size(task->meta_words) > 0) { + rspamd_word_t *w; double cur_score = 0; - gsize arlen = task->meta_words->len; + gsize arlen = kv_size(task->meta_words); for (i = 0; i < arlen; i++) { - w = &g_array_index(task->meta_words, rspamd_stat_token_t, i); + w = &kv_A(task->meta_words, i); cur_score += rspamd_chartable_process_word_utf(task, w, FALSE, nullptr, chartable_module_ctx, ignore_diacritics); } diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index ece9a91e0..7dd5162ac 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,7 +78,8 @@ enum fuzzy_rule_mode { }; struct fuzzy_rule { - struct upstream_list *servers; + struct upstream_list *read_servers; /* Servers for read operations */ + struct upstream_list *write_servers; /* Servers for write operations */ const char *symbol; const char *algorithm_str; const char *name; @@ -543,22 +544,68 @@ fuzzy_parse_rule(struct rspamd_config *cfg, const ucl_object_t *obj, } if ((value = ucl_object_lookup(obj, "servers")) != NULL) { - rule->servers = rspamd_upstreams_create(cfg->ups_ctx); - /* pass max_error and revive_time configuration in upstream for fuzzy storage - * it allows to configure error_rate threshold and upstream dead timer - */ - rspamd_upstreams_set_limits(rule->servers, + rule->read_servers = rspamd_upstreams_create(cfg->ups_ctx); + rspamd_upstreams_set_limits(rule->read_servers, (double) fuzzy_module_ctx->revive_time, NAN, NAN, NAN, (unsigned int) fuzzy_module_ctx->max_errors, 0); rspamd_mempool_add_destructor(cfg->cfg_pool, (rspamd_mempool_destruct_t) rspamd_upstreams_destroy, - rule->servers); - if (!rspamd_upstreams_from_ucl(rule->servers, value, DEFAULT_PORT, NULL)) { + rule->read_servers); + if (!rspamd_upstreams_from_ucl(rule->read_servers, value, DEFAULT_PORT, NULL)) { msg_err_config("cannot read servers definition"); return -1; } + + rule->write_servers = rule->read_servers; + } + else { + /* Check for read_servers and write_servers */ + gboolean has_read = FALSE, has_write = FALSE; + + if ((value = ucl_object_lookup(obj, "read_servers")) != NULL) { + rule->read_servers = rspamd_upstreams_create(cfg->ups_ctx); + rspamd_upstreams_set_limits(rule->read_servers, + (double) fuzzy_module_ctx->revive_time, NAN, NAN, NAN, + (unsigned int) fuzzy_module_ctx->max_errors, 0); + + rspamd_mempool_add_destructor(cfg->cfg_pool, + (rspamd_mempool_destruct_t) rspamd_upstreams_destroy, + rule->read_servers); + if (!rspamd_upstreams_from_ucl(rule->read_servers, value, DEFAULT_PORT, NULL)) { + msg_err_config("cannot read read_servers definition"); + return -1; + } + has_read = TRUE; + } + + if ((value = ucl_object_lookup(obj, "write_servers")) != NULL) { + rule->write_servers = rspamd_upstreams_create(cfg->ups_ctx); + rspamd_upstreams_set_limits(rule->write_servers, + (double) fuzzy_module_ctx->revive_time, NAN, NAN, NAN, + (unsigned int) fuzzy_module_ctx->max_errors, 0); + + rspamd_mempool_add_destructor(cfg->cfg_pool, + (rspamd_mempool_destruct_t) rspamd_upstreams_destroy, + rule->write_servers); + if (!rspamd_upstreams_from_ucl(rule->write_servers, value, DEFAULT_PORT, NULL)) { + msg_err_config("cannot read write_servers definition"); + return -1; + } + has_write = TRUE; + } + + /* If we have both read and write servers, we don't need the common servers list */ + if (has_read && !has_write) { + /* Use read_servers for all operations */ + rule->write_servers = rule->read_servers; + } + else if (has_write && !has_read) { + /* Use write_servers for all operations */ + rule->read_servers = rule->write_servers; + } } + if ((value = ucl_object_lookup(obj, "fuzzy_map")) != NULL) { it = NULL; while ((cur = ucl_object_iterate(value, &it, true)) != NULL) { @@ -636,7 +683,7 @@ fuzzy_parse_rule(struct rspamd_config *cfg, const ucl_object_t *obj, strlen(shingles_key_str), NULL, 0); rule->shingles_key->len = 16; - if (rspamd_upstreams_count(rule->servers) == 0) { + if (rspamd_upstreams_count(rule->read_servers) == 0) { msg_err_config("no servers defined for fuzzy rule with name: %s", rule->name); return -1; @@ -898,6 +945,24 @@ int fuzzy_check_module_init(struct rspamd_config *cfg, struct module_ctx **ctx) 0); rspamd_rcl_add_doc_by_path(cfg, "fuzzy_check.rule", + "List of servers to check (read-only operations)", + "read_servers", + UCL_STRING, + NULL, + 0, + NULL, + 0); + rspamd_rcl_add_doc_by_path(cfg, + "fuzzy_check.rule", + "List of servers to learn (write operations)", + "write_servers", + UCL_STRING, + NULL, + 0, + NULL, + 0); + rspamd_rcl_add_doc_by_path(cfg, + "fuzzy_check.rule", "If true then never try to learn this fuzzy storage", "read_only", UCL_BOOLEAN, @@ -1249,7 +1314,7 @@ int fuzzy_check_module_config(struct rspamd_config *cfg, bool validate) LL_FOREACH(value, cur) { - if (ucl_object_lookup(cur, "servers")) { + if (ucl_object_lookup_any(cur, "servers", "read_servers", "write_servers", NULL) != NULL) { /* Unnamed rule */ fuzzy_parse_rule(cfg, cur, NULL, cb_id); nrules++; @@ -1366,10 +1431,10 @@ fuzzy_io_fin(void *ud) close(session->fd); } -static GArray * +static rspamd_words_t * fuzzy_preprocess_words(struct rspamd_mime_text_part *part, rspamd_mempool_t *pool) { - return part->utf_words; + return &part->utf_words; } static void @@ -1715,26 +1780,30 @@ fuzzy_cmd_write_extensions(struct rspamd_task *task, struct rspamd_email_address *addr = g_ptr_array_index(MESSAGE_FIELD(task, from_mime), 0); - unsigned int to_write = MIN(MAX_FUZZY_DOMAIN, addr->domain_len) + 2; - if (to_write > 0 && to_write <= available) { - *dest++ = RSPAMD_FUZZY_EXT_SOURCE_DOMAIN; - *dest++ = to_write - 2; + if (addr->domain_len > 0) { + /* Filter invalid domains */ + unsigned int to_write = MIN(MAX_FUZZY_DOMAIN, addr->domain_len) + 2; - if (addr->domain_len < MAX_FUZZY_DOMAIN) { - memcpy(dest, addr->domain, addr->domain_len); - dest += addr->domain_len; - } - else { - /* Trim from left */ - memcpy(dest, - addr->domain + (addr->domain_len - MAX_FUZZY_DOMAIN), - MAX_FUZZY_DOMAIN); - dest += MAX_FUZZY_DOMAIN; - } + if (to_write > 0 && to_write <= available) { + *dest++ = RSPAMD_FUZZY_EXT_SOURCE_DOMAIN; + *dest++ = to_write - 2; + + if (addr->domain_len < MAX_FUZZY_DOMAIN) { + memcpy(dest, addr->domain, addr->domain_len); + dest += addr->domain_len; + } + else { + /* Trim from left */ + memcpy(dest, + addr->domain + (addr->domain_len - MAX_FUZZY_DOMAIN), + MAX_FUZZY_DOMAIN); + dest += MAX_FUZZY_DOMAIN; + } - available -= to_write; - written += to_write; + available -= to_write; + written += to_write; + } } } @@ -1792,7 +1861,7 @@ fuzzy_cmd_from_text_part(struct rspamd_task *task, unsigned int i; rspamd_cryptobox_hash_state_t st; rspamd_stat_token_t *word; - GArray *words; + rspamd_words_t *words; struct fuzzy_cmd_io *io; unsigned int additional_length; unsigned char *additional_data; @@ -1901,10 +1970,10 @@ fuzzy_cmd_from_text_part(struct rspamd_task *task, rspamd_cryptobox_hash_init(&st, rule->hash_key->str, rule->hash_key->len); words = fuzzy_preprocess_words(part, task->task_pool); - for (i = 0; i < words->len; i++) { - word = &g_array_index(words, rspamd_stat_token_t, i); + for (i = 0; i < kv_size(*words); i++) { + word = &kv_A(*words, i); - if (!((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0)) { + if (!((word->flags & RSPAMD_WORD_FLAG_SKIPPED) || word->stemmed.len == 0)) { rspamd_cryptobox_hash_update(&st, word->stemmed.begin, word->stemmed.len); } @@ -2615,7 +2684,7 @@ fuzzy_insert_metric_results(struct rspamd_task *task, struct fuzzy_rule *rule, if (task->message) { PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp) { - if (!IS_TEXT_PART_EMPTY(tp) && tp->utf_words != NULL && tp->utf_words->len > 0) { + if (!IS_TEXT_PART_EMPTY(tp) && kv_size(tp->utf_words) > 0) { seen_text_part = TRUE; if (tp->utf_stripped_text.magic == UTEXT_MAGIC) { @@ -3394,8 +3463,8 @@ register_fuzzy_client_call(struct rspamd_task *task, int sock; if (!rspamd_session_blocked(task->s)) { - /* Get upstream */ - selected = rspamd_upstream_get(rule->servers, RSPAMD_UPSTREAM_ROUND_ROBIN, + /* Get upstream - use read_servers for check operations */ + selected = rspamd_upstream_get(rule->read_servers, RSPAMD_UPSTREAM_ROUND_ROBIN, NULL, 0); if (selected) { addr = rspamd_upstream_addr_next(selected); @@ -3522,9 +3591,8 @@ register_fuzzy_controller_call(struct rspamd_http_connection_entry *entry, int sock; int ret = -1; - /* Get upstream */ - - while ((selected = rspamd_upstream_get_forced(rule->servers, + /* Get upstream - use write_servers for learn/unlearn operations */ + while ((selected = rspamd_upstream_get_forced(rule->write_servers, RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) { /* Create UDP socket */ addr = rspamd_upstream_addr_next(selected); @@ -3538,6 +3606,9 @@ register_fuzzy_controller_call(struct rspamd_http_connection_entry *entry, rspamd_upstream_fail(selected, TRUE, strerror(errno)); } else { + msg_info_task("fuzzy storage %s (%s rule) is used for write", + rspamd_inet_address_to_string_pretty(addr), + rule->name); s = rspamd_mempool_alloc0(session->pool, sizeof(struct fuzzy_learn_session)); @@ -3620,6 +3691,7 @@ fuzzy_modify_handler(struct rspamd_http_connection_entry *conn_ent, PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule) { if (rule->mode == fuzzy_rule_read_only) { + msg_debug_task("skip rule %s as it is read-only", rule->name); continue; } @@ -3729,6 +3801,8 @@ fuzzy_modify_handler(struct rspamd_http_connection_entry *conn_ent, else { commands = fuzzy_generate_commands(task, rule, cmd, flag, value, flags); + msg_debug_task("fuzzy command %d for rule %s, flag %d, value %d", + cmd, rule->name, flag, value); if (commands != NULL) { res = register_fuzzy_controller_call(conn_ent, rule, @@ -3894,7 +3968,7 @@ fuzzy_check_send_lua_learn(struct fuzzy_rule *rule, /* Get upstream */ if (!rspamd_session_blocked(task->s)) { - while ((selected = rspamd_upstream_get(rule->servers, + while ((selected = rspamd_upstream_get(rule->write_servers, RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) { /* Create UDP socket */ addr = rspamd_upstream_addr_next(selected); @@ -4491,9 +4565,21 @@ fuzzy_lua_list_storages(lua_State *L) lua_setfield(L, -2, "read_only"); /* Push servers */ - lua_createtable(L, rspamd_upstreams_count(rule->servers), 0); - rspamd_upstreams_foreach(rule->servers, lua_upstream_str_inserter, L); - lua_setfield(L, -2, "servers"); + if (rule->read_servers == rule->write_servers) { + /* Same servers for both operations */ + lua_createtable(L, rspamd_upstreams_count(rule->read_servers), 0); + rspamd_upstreams_foreach(rule->read_servers, lua_upstream_str_inserter, L); + lua_setfield(L, -2, "servers"); + } + else { + /* Different servers for read and write */ + lua_createtable(L, rspamd_upstreams_count(rule->read_servers), 0); + rspamd_upstreams_foreach(rule->read_servers, lua_upstream_str_inserter, L); + lua_setfield(L, -2, "read_servers"); + lua_createtable(L, rspamd_upstreams_count(rule->write_servers), 0); + rspamd_upstreams_foreach(rule->write_servers, lua_upstream_str_inserter, L); + lua_setfield(L, -2, "write_servers"); + } /* Push flags */ GHashTableIter it; @@ -4780,7 +4866,7 @@ fuzzy_lua_ping_storage(lua_State *L) rspamd_ptr_array_free_hard, addrs); } else { - struct upstream *selected = rspamd_upstream_get(rule_found->servers, + struct upstream *selected = rspamd_upstream_get(rule_found->read_servers, RSPAMD_UPSTREAM_ROUND_ROBIN, NULL, 0); addr = rspamd_upstream_addr_next(selected); } @@ -4824,4 +4910,4 @@ fuzzy_lua_ping_storage(lua_State *L) lua_pushboolean(L, TRUE); return 1; -}
\ No newline at end of file +} diff --git a/src/plugins/lua/arc.lua b/src/plugins/lua/arc.lua index fb5dd93e6..954583ed0 100644 --- a/src/plugins/lua/arc.lua +++ b/src/plugins/lua/arc.lua @@ -72,12 +72,13 @@ local settings = { use_domain = 'header', use_esld = true, use_redis = false, - key_prefix = 'arc_keys', -- default hash name - reuse_auth_results = false, -- Reuse the existing authentication results + key_prefix = 'arc_keys', -- default hash name + reuse_auth_results = false, -- Reuse the existing authentication results whitelisted_signers_map = nil, -- Trusted signers domains - adjust_dmarc = true, -- Adjust DMARC rejected policy for trusted forwarders - allowed_ids = nil, -- Allowed settings id - forbidden_ids = nil, -- Banned settings id + whitelist = nil, -- Domains with broken ARC implementations to trust despite validation failures + adjust_dmarc = true, -- Adjust DMARC rejected policy for trusted forwarders + allowed_ids = nil, -- Allowed settings id + forbidden_ids = nil, -- Banned settings id } -- To match normal AR @@ -86,15 +87,15 @@ local ar_settings = lua_auth_results.default_settings local function parse_arc_header(hdr, target, is_aar) -- Split elements by ';' and trim spaces local arr = fun.totable(fun.map( - function(val) - return fun.totable(fun.map(lua_util.rspamd_str_trim, - fun.filter(function(v) - return v and #v > 0 - end, - lua_util.rspamd_str_split(val.decoded, ';') - ) - )) - end, hdr + function(val) + return fun.totable(fun.map(lua_util.rspamd_str_trim, + fun.filter(function(v) + return v and #v > 0 + end, + lua_util.rspamd_str_split(val.decoded, ';') + ) + )) + end, hdr )) -- v[1] is the key and v[2] is the value @@ -115,11 +116,11 @@ local function parse_arc_header(hdr, target, is_aar) if not is_aar then -- For normal ARC headers we split by kv pair, like k=v fun.each(function(v) - fill_arc_header_table(v, target[i]) - end, - fun.map(function(elt) - return lua_util.rspamd_str_split(elt, '=') - end, elts) + fill_arc_header_table(v, target[i]) + end, + fun.map(function(elt) + return lua_util.rspamd_str_split(elt, '=') + end, elts) ) else -- For AAR we check special case of i=%d and pass everything else to @@ -147,7 +148,7 @@ local function parse_arc_header(hdr, target, is_aar) -- sort by i= attribute table.sort(target, function(a, b) - return (a.i or 0) < (b.i or 0) + return (tonumber(a.i) or 0) < (tonumber(b.i) or 0) end) end @@ -156,14 +157,14 @@ local function arc_validate_seals(task, seals, sigs, seal_headers, sig_headers) for i = 1, #seals do if (sigs[i].i or 0) ~= i then fail_reason = string.format('bad i for signature: %d, expected %d; d=%s', - sigs[i].i, i, sigs[i].d) + sigs[i].i, i, sigs[i].d) rspamd_logger.infox(task, fail_reason) task:insert_result(arc_symbols['invalid'], 1.0, fail_reason) return false, fail_reason end if (seals[i].i or 0) ~= i then fail_reason = string.format('bad i for seal: %d, expected %d; d=%s', - seals[i].i, i, seals[i].d) + seals[i].i, i, seals[i].d) rspamd_logger.infox(task, fail_reason) task:insert_result(arc_symbols['invalid'], 1.0, fail_reason) return false, fail_reason @@ -207,7 +208,7 @@ local function arc_callback(task) if #arc_sig_headers ~= #arc_seal_headers then -- We mandate that count of seals is equal to count of signatures rspamd_logger.infox(task, 'number of seals (%s) is not equal to number of signatures (%s)', - #arc_seal_headers, #arc_sig_headers) + #arc_seal_headers, #arc_sig_headers) task:insert_result(arc_symbols['invalid'], 1.0, 'invalid count of seals and signatures') return end @@ -249,7 +250,7 @@ local function arc_callback(task) -- Now check sanity of what we have local valid, validation_error = arc_validate_seals(task, cbdata.seals, cbdata.sigs, - arc_seal_headers, arc_sig_headers) + arc_seal_headers, arc_sig_headers) if not valid then task:cache_set('arc-failure', validation_error) return @@ -267,12 +268,20 @@ local function arc_callback(task) local function gen_arc_seal_cb(index, sig) return function(_, res, err, domain) lua_util.debugm(N, task, 'checked arc seal: %s(%s), %s processed', - res, err, index) + res, err, index) if not res then - cbdata.res = 'fail' - if err and domain then - table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err)) + -- Check if this domain is whitelisted for broken ARC implementations + if settings.whitelist and domain and settings.whitelist:get_key(domain) then + rspamd_logger.infox(task, 'ARC seal validation failed for whitelisted domain %s, treating as valid: %s', + domain, err) + lua_util.debugm(N, task, 'whitelisted domain %s ARC seal failure ignored', domain) + res = true -- Treat as valid to continue the chain + else + cbdata.res = 'fail' + if err and domain then + table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err)) + end end end @@ -283,7 +292,7 @@ local function arc_callback(task) local cur_aar = cbdata.ars[index] if not cur_aar then rspamd_logger.warnx(task, "cannot find Arc-Authentication-Results for trusted " .. - "forwarder %s on i=%s", domain, cbdata.index) + "forwarder %s on i=%s", domain, cbdata.index) else task:cache_set(AR_TRUSTED_CACHE_KEY, cur_aar) local seen_dmarc @@ -309,20 +318,20 @@ local function arc_callback(task) end end task:insert_result(arc_symbols.trusted_allow, mult, - string.format('%s:s=%s:i=%d', domain, sig.s, index)) + string.format('%s:s=%s:i=%d', domain, sig.s, index)) end end if index == #arc_sig_headers then if cbdata.res == 'success' then local arc_allow_result = string.format('%s:s=%s:i=%d', - domain, sig.s, index) + domain, sig.s, index) task:insert_result(arc_symbols.allow, 1.0, arc_allow_result) task:cache_set('arc-allow', arc_allow_result) else task:insert_result(arc_symbols.reject, 1.0, - rspamd_logger.slog('seal check failed: %s, %s', cbdata.res, - cbdata.errors)) + rspamd_logger.slog('seal check failed: %s, %s', cbdata.res, + cbdata.errors)) end end end @@ -330,12 +339,20 @@ local function arc_callback(task) local function arc_signature_cb(_, res, err, domain) lua_util.debugm(N, task, 'checked arc signature %s: %s(%s)', - domain, res, err) + domain, res, err) if not res then - cbdata.res = 'fail' - if err and domain then - table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err)) + -- Check if this domain is whitelisted for broken ARC implementations + if settings.whitelist and domain and settings.whitelist:get_key(domain) then + rspamd_logger.infox(task, 'ARC signature validation failed for whitelisted domain %s, treating as valid: %s', + domain, err) + lua_util.debugm(N, task, 'whitelisted domain %s ARC signature failure ignored', domain) + res = true -- Treat as valid to continue the chain + else + cbdata.res = 'fail' + if err and domain then + table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err)) + end end end if cbdata.res == 'success' then @@ -343,17 +360,24 @@ local function arc_callback(task) for i, sig in ipairs(cbdata.seals) do local ret, lerr = dkim_verify(task, sig.header, gen_arc_seal_cb(i, sig), 'arc-seal') if not ret then - cbdata.res = 'fail' - table.insert(cbdata.errors, string.format('seal:%s:s=%s:i=%s:%s', + -- Check if this domain is whitelisted for broken ARC implementations + if settings.whitelist and sig.d and settings.whitelist:get_key(sig.d) then + rspamd_logger.infox(task, 'ARC seal dkim_verify failed for whitelisted domain %s, treating as valid: %s', + sig.d, lerr) + lua_util.debugm(N, task, 'whitelisted domain %s ARC seal dkim_verify failure ignored', sig.d) + else + cbdata.res = 'fail' + table.insert(cbdata.errors, string.format('seal:%s:s=%s:i=%s:%s', sig.d or '', sig.s or '', sig.i or '', lerr)) - lua_util.debugm(N, task, 'checked arc seal %s: %s(%s), %s processed', + lua_util.debugm(N, task, 'checked arc seal %s: %s(%s), %s processed', sig.d, ret, lerr, i) + end end end else task:insert_result(arc_symbols['reject'], 1.0, - rspamd_logger.slog('signature check failed: %s, %s', cbdata.res, - cbdata.errors)) + rspamd_logger.slog('signature check failed: %s, %s', cbdata.res, + cbdata.errors)) end end @@ -397,25 +421,33 @@ local function arc_callback(task) is "fail" and the algorithm stops here. 9. If the algorithm reaches this step, then the Chain Validation Status is "pass", and the algorithm is complete. - ]]-- + ]] -- local processed = 0 local sig = cbdata.sigs[#cbdata.sigs] -- last AMS local ret, err = dkim_verify(task, sig.header, arc_signature_cb, 'arc-sign') if not ret then - cbdata.res = 'fail' - table.insert(cbdata.errors, string.format('sig:%s:%s', sig.d or '', err)) + -- Check if this domain is whitelisted for broken ARC implementations + if settings.whitelist and sig.d and settings.whitelist:get_key(sig.d) then + rspamd_logger.infox(task, 'ARC signature dkim_verify failed for whitelisted domain %s, treating as valid: %s', + sig.d, err) + lua_util.debugm(N, task, 'whitelisted domain %s ARC signature dkim_verify failure ignored', sig.d) + processed = processed + 1 + else + cbdata.res = 'fail' + table.insert(cbdata.errors, string.format('sig:%s:%s', sig.d or '', err)) + end else processed = processed + 1 lua_util.debugm(N, task, 'processed arc signature %s[%s]: %s(%s), %s total', - sig.d, sig.i, ret, err, #cbdata.seals) + sig.d, sig.i, ret, err, #cbdata.seals) end if processed == 0 then task:insert_result(arc_symbols['reject'], 1.0, - rspamd_logger.slog('cannot verify %s of %s signatures: %s', - #arc_sig_headers - processed, #arc_sig_headers, cbdata.errors)) + rspamd_logger.slog('cannot verify %s of %s signatures: %s', + #arc_sig_headers - processed, #arc_sig_headers, cbdata.errors)) end end @@ -538,13 +570,13 @@ local function arc_sign_seal(task, params, header) for i = 1, #arc_seals, 1 do if arc_auth_results[i] then local s = dkim_canonicalize('ARC-Authentication-Results', - arc_auth_results[i].raw_header) + arc_auth_results[i].raw_header) sha_ctx:update(s) lua_util.debugm(N, task, 'update signature with header: %s', s) end if arc_sigs[i] then local s = dkim_canonicalize('ARC-Message-Signature', - arc_sigs[i].raw_header) + arc_sigs[i].raw_header) sha_ctx:update(s) lua_util.debugm(N, task, 'update signature with header: %s', s) end @@ -557,16 +589,16 @@ local function arc_sign_seal(task, params, header) end header = lua_util.fold_header(task, - 'ARC-Message-Signature', - header) + 'ARC-Message-Signature', + header) cur_auth_results = string.format('i=%d; %s', cur_idx, cur_auth_results) cur_auth_results = lua_util.fold_header(task, - 'ARC-Authentication-Results', - cur_auth_results, ';') + 'ARC-Authentication-Results', + cur_auth_results, ';') local s = dkim_canonicalize('ARC-Authentication-Results', - cur_auth_results) + cur_auth_results) sha_ctx:update(s) lua_util.debugm(N, task, 'update signature with header: %s', s) s = dkim_canonicalize('ARC-Message-Signature', header) @@ -574,10 +606,10 @@ local function arc_sign_seal(task, params, header) lua_util.debugm(N, task, 'update signature with header: %s', s) local cur_arc_seal = string.format('i=%d; s=%s; d=%s; t=%d; a=rsa-sha256; cv=%s; b=', - cur_idx, - params.selector, - params.domain, - math.floor(rspamd_util.get_time()), params.arc_cv) + cur_idx, + params.selector, + params.domain, + math.floor(rspamd_util.get_time()), params.arc_cv) s = string.format('%s:%s', 'arc-seal', cur_arc_seal) sha_ctx:update(s) lua_util.debugm(N, task, 'initial update signature with header: %s', s) @@ -591,20 +623,23 @@ local function arc_sign_seal(task, params, header) local sig = rspamd_rsa.sign_memory(privkey, sha_ctx:bin()) cur_arc_seal = string.format('%s%s', cur_arc_seal, - sig:base64(70, nl_type)) + sig:base64(70, nl_type)) lua_mime.modify_headers(task, { add = { ['ARC-Authentication-Results'] = { order = 1, value = cur_auth_results }, ['ARC-Message-Signature'] = { order = 1, value = header }, - ['ARC-Seal'] = { order = 1, value = lua_util.fold_header(task, - 'ARC-Seal', cur_arc_seal) } + ['ARC-Seal'] = { + order = 1, + value = lua_util.fold_header(task, + 'ARC-Seal', cur_arc_seal) + } }, -- RFC requires a strict order for these headers to be inserted order = { 'ARC-Authentication-Results', 'ARC-Message-Signature', 'ARC-Seal' }, }) task:insert_result(settings.sign_symbol, 1.0, - string.format('%s:s=%s:i=%d', params.domain, params.selector, cur_idx)) + string.format('%s:s=%s:i=%d', params.domain, params.selector, cur_idx)) end local function prepare_arc_selector(task, sel) @@ -668,7 +703,6 @@ local function prepare_arc_selector(task, sel) else default_arc_cv() end - end return true @@ -695,19 +729,18 @@ local function do_sign(task, sign_params) sign_params.pubkey = results[1] sign_params.strict_pubkey_check = not settings.allow_pubkey_mismatch elseif not settings.allow_pubkey_mismatch then - rspamd_logger.errx('public key for domain %s/%s is not found: %s, skip signing', - sign_params.domain, sign_params.selector, err) + rspamd_logger.errx(task, 'public key for domain %s/%s is not found: %s, skip signing', + sign_params.domain, sign_params.selector, err) return else - rspamd_logger.infox('public key for domain %s/%s is not found: %s', - sign_params.domain, sign_params.selector, err) + rspamd_logger.infox(task, 'public key for domain %s/%s is not found: %s', + sign_params.domain, sign_params.selector, err) end local dret, hdr = dkim_sign(task, sign_params) if dret then arc_sign_seal(task, sign_params, hdr) end - end, forced = true }) @@ -768,6 +801,31 @@ end dkim_sign_tools.process_signing_settings(N, settings, opts) +-- Process ARC-specific maps that aren't handled by dkim_sign_tools +local lua_maps = require "lua_maps" + +if opts.whitelisted_signers_map then + settings.whitelisted_signers_map = lua_maps.map_add_from_ucl(opts.whitelisted_signers_map, 'set', + 'ARC trusted signers domains') + if not settings.whitelisted_signers_map then + rspamd_logger.errx(rspamd_config, 'cannot load whitelisted_signers_map') + settings.whitelisted_signers_map = nil + else + rspamd_logger.infox(rspamd_config, 'loaded ARC whitelisted signers map') + end +end + +if opts.whitelist then + settings.whitelist = lua_maps.map_add_from_ucl(opts.whitelist, 'set', + 'ARC domains with broken implementations') + if not settings.whitelist then + rspamd_logger.errx(rspamd_config, 'cannot load ARC whitelist map') + settings.whitelist = nil + else + rspamd_logger.infox(rspamd_config, 'loaded ARC whitelist map') + end +end + if not dkim_sign_tools.validate_signing_settings(settings) then rspamd_logger.infox(rspamd_config, 'mandatory parameters missing, disable arc signing') return @@ -780,7 +838,7 @@ if ar_opts and ar_opts.routines then if routines['authentication-results'] then ar_settings = lua_util.override_defaults(ar_settings, - routines['authentication-results']) + routines['authentication-results']) end end @@ -789,7 +847,7 @@ if settings.use_redis then if not redis_params then rspamd_logger.errx(rspamd_config, 'no servers are specified, ' .. - 'but module is configured to load keys from redis, disable arc signing') + 'but module is configured to load keys from redis, disable arc signing') return end @@ -845,9 +903,9 @@ if settings.adjust_dmarc and settings.whitelisted_signers_map then local dmarc_fwd = ar.dmarc if dmarc_fwd == 'pass' then rspamd_logger.infox(task, "adjust dmarc reject score as trusted forwarder " - .. "proved DMARC validity for %s", ar['header.from']) + .. "proved DMARC validity for %s", ar['header.from']) task:adjust_result(sym_to_adjust, 0.1, - 'ARC trusted') + 'ARC trusted') end end end diff --git a/src/plugins/lua/bayes_expiry.lua b/src/plugins/lua/bayes_expiry.lua index 44ff9dafa..0d78f2272 100644 --- a/src/plugins/lua/bayes_expiry.lua +++ b/src/plugins/lua/bayes_expiry.lua @@ -41,32 +41,38 @@ local template = {} local function check_redis_classifier(cls, cfg) -- Skip old classifiers if cls.new_schema then - local symbol_spam, symbol_ham + local class_symbols = {} + local class_labels = {} local expiry = (cls.expiry or cls.expire) if type(expiry) == 'table' then expiry = expiry[1] end - -- Load symbols from statfiles + -- Extract class_labels mapping from classifier config + if cls.class_labels then + class_labels = cls.class_labels + end + -- Load symbols from statfiles for multi-class support local function check_statfile_table(tbl, def_sym) local symbol = tbl.symbol or def_sym - - local spam - if tbl.spam then - spam = tbl.spam - else - if string.match(symbol:upper(), 'SPAM') then - spam = true + local class_name = tbl.class + + -- Handle legacy spam/ham detection for backward compatibility + if not class_name then + if tbl.spam ~= nil then + class_name = tbl.spam and 'spam' or 'ham' + elseif string.match(tostring(symbol):upper(), 'SPAM') then + class_name = 'spam' + elseif string.match(tostring(symbol):upper(), 'HAM') then + class_name = 'ham' else - spam = false + class_name = def_sym end end - if spam then - symbol_spam = symbol - else - symbol_ham = symbol + if class_name then + class_symbols[class_name] = symbol end end @@ -87,10 +93,9 @@ local function check_redis_classifier(cls, cfg) end end - if not symbol_spam or not symbol_ham or type(expiry) ~= 'number' then + if next(class_symbols) == nil or type(expiry) ~= 'number' then logger.debugm(N, rspamd_config, - 'disable expiry for classifier %s: no expiry %s', - symbol_spam, cls) + 'disable expiry for classifier: no class symbols or expiry configured') return end -- Now try to load redis_params if needed @@ -108,17 +113,16 @@ local function check_redis_classifier(cls, cfg) end if redis_params['read_only'] then - logger.infox(rspamd_config, 'disable expiry for classifier %s: read only redis configuration', - symbol_spam) + logger.infox(rspamd_config, 'disable expiry for classifier: read only redis configuration') return end - logger.debugm(N, rspamd_config, "enabled expiry for %s/%s -> %s expiry", - symbol_spam, symbol_ham, expiry) + logger.debugm(N, rspamd_config, "enabled expiry for classes %s -> %s expiry", + table.concat(lutil.keys(class_symbols), ', '), expiry) table.insert(settings.classifiers, { - symbol_spam = symbol_spam, - symbol_ham = symbol_ham, + class_symbols = class_symbols, + class_labels = class_labels, redis_params = redis_params, expiry = expiry }) @@ -249,12 +253,11 @@ local expiry_script = [[ local keys = ret[2] local tokens = {} - -- Tokens occurrences distribution counters + -- Dynamic occurrence tracking for all classes local occur = { - ham = {}, - spam = {}, total = {} } + local classes_found = {} -- Expiry step statistics counters local nelts, extended, discriminated, sum, sum_squares, common, significant, @@ -264,24 +267,44 @@ local expiry_script = [[ for _,key in ipairs(keys) do local t = redis.call('TYPE', key)["ok"] if t == 'hash' then - local values = redis.call('HMGET', key, 'H', 'S') - local ham = tonumber(values[1]) or 0 - local spam = tonumber(values[2]) or 0 + -- Get all hash fields to support multi-class + local hash_data = redis.call('HGETALL', key) + local class_counts = {} + local total = 0 local ttl = redis.call('TTL', key) + + -- Parse hash data into class counts + for i = 1, #hash_data, 2 do + local class_label = hash_data[i] + local count = tonumber(hash_data[i + 1]) or 0 + class_counts[class_label] = count + total = total + count + + -- Track classes we've seen + if not classes_found[class_label] then + classes_found[class_label] = true + occur[class_label] = {} + end + end + tokens[key] = { - ham, - spam, - ttl + class_counts = class_counts, + total = total, + ttl = ttl } - local total = spam + ham + sum = sum + total sum_squares = sum_squares + total * total nelts = nelts + 1 - for k,v in pairs({['ham']=ham, ['spam']=spam, ['total']=total}) do - if tonumber(v) > 19 then v = 20 end - occur[k][v] = occur[k][v] and occur[k][v] + 1 or 1 + -- Update occurrence counters for all classes and total + for class_label, count in pairs(class_counts) do + local bucket = count > 19 and 20 or count + occur[class_label][bucket] = (occur[class_label][bucket] or 0) + 1 end + + local total_bucket = total > 19 and 20 or total + occur.total[total_bucket] = (occur.total[total_bucket] or 0) + 1 end end @@ -293,9 +316,10 @@ local expiry_script = [[ end for key,token in pairs(tokens) do - local ham, spam, ttl = token[1], token[2], tonumber(token[3]) + local class_counts = token.class_counts + local total = token.total + local ttl = tonumber(token.ttl) local threshold = mean - local total = spam + ham local function set_ttl() if expire < 0 then @@ -310,14 +334,39 @@ local expiry_script = [[ return 0 end - if total == 0 or math.abs(ham - spam) <= total * ${epsilon_common} then + -- Check if token is common (balanced across classes) + local is_common = false + if total == 0 then + is_common = true + else + -- For multi-class, check if any class dominates significantly + local max_count = 0 + for _, count in pairs(class_counts) do + if count > max_count then + max_count = count + end + end + -- Token is common if no class has more than (1 - epsilon) of total + is_common = (max_count / total) <= (1 - ${epsilon_common}) + end + + if is_common then common = common + 1 if ttl > ${common_ttl} then discriminated = discriminated + 1 redis.call('EXPIRE', key, ${common_ttl}) end elseif total >= threshold and total > 0 then - if ham / total > ${significant_factor} or spam / total > ${significant_factor} then + -- Check if any class is significant + local is_significant = false + for _, count in pairs(class_counts) do + if count / total > ${significant_factor} then + is_significant = true + break + end + end + + if is_significant then significant = significant + 1 if ttl ~= -1 then redis.call('PERSIST', key) @@ -361,33 +410,50 @@ local expiry_script = [[ redis.call('DEL', lock_key) local occ_distr = {} - for _,cl in pairs({'ham', 'spam', 'total'}) do + + -- Process all classes found plus total + local all_classes = {'total'} + for class_label in pairs(classes_found) do + table.insert(all_classes, class_label) + end + + for _, cl in ipairs(all_classes) do local occur_key = pattern_sha1 .. '_occurrence_' .. cl if cursor ~= 0 then - local n - for i,v in ipairs(redis.call('HGETALL', occur_key)) do - if i % 2 == 1 then - n = tonumber(v) - else - occur[cl][n] = occur[cl][n] and occur[cl][n] + v or v + local existing_data = redis.call('HGETALL', occur_key) + if #existing_data > 0 then + for i = 1, #existing_data, 2 do + local bucket = tonumber(existing_data[i]) + local count = tonumber(existing_data[i + 1]) + if occur[cl] and occur[cl][bucket] then + occur[cl][bucket] = occur[cl][bucket] + count + elseif occur[cl] then + occur[cl][bucket] = count + end end end - local str = '' - if occur[cl][0] ~= nil then - str = '0:' .. occur[cl][0] .. ',' - end - for k,v in ipairs(occur[cl]) do - if k == 20 then k = '>19' end - str = str .. k .. ':' .. v .. ',' + if occur[cl] and next(occur[cl]) then + local str = '' + if occur[cl][0] then + str = '0:' .. occur[cl][0] .. ',' + end + for k = 1, 20 do + if occur[cl][k] then + local label = k == 20 and '>19' or tostring(k) + str = str .. label .. ':' .. occur[cl][k] .. ',' + end + end + table.insert(occ_distr, cl .. '=' .. str) + else + table.insert(occ_distr, cl .. '=no_data') end - table.insert(occ_distr, str) else redis.call('DEL', occur_key) end - if next(occur[cl]) ~= nil then + if occur[cl] and next(occur[cl]) then redis.call('HMSET', occur_key, unpack_function(hash2list(occur[cl]))) end end @@ -446,8 +512,8 @@ local function expire_step(cls, ev_base, worker) '%s infrequent (%s %s), %s mean, %s std', lutil.unpack(d)) if cycle then - for i, cl in ipairs({ 'in ham', 'in spam', 'total' }) do - logger.infox(rspamd_config, 'tokens occurrences, %s: {%s}', cl, occ_distr[i]) + for _, distr_info in ipairs(occ_distr) do + logger.infox(rspamd_config, 'tokens occurrences: {%s}', distr_info) end end end diff --git a/src/plugins/lua/contextal.lua b/src/plugins/lua/contextal.lua index 19b599e11..e29c21645 100644 --- a/src/plugins/lua/contextal.lua +++ b/src/plugins/lua/contextal.lua @@ -79,7 +79,7 @@ local wait_request_ttl = true local function maybe_defer(task, obj) if settings.defer_if_no_result and not ((obj or E)[1] or E).actions then - task:set_pre_result('soft reject', settings.defer_message) + task:set_pre_result('soft reject', settings.defer_message, N) end end diff --git a/src/plugins/lua/fuzzy_collect.lua b/src/plugins/lua/fuzzy_collect.lua index 132ace90c..060cc2fc2 100644 --- a/src/plugins/lua/fuzzy_collect.lua +++ b/src/plugins/lua/fuzzy_collect.lua @@ -34,7 +34,7 @@ local settings = { local function send_data_mirror(m, cfg, ev_base, body) local function store_callback(err, _, _, _) if err then - rspamd_logger.errx(cfg, 'cannot save data on %(%s): %s', m.server, m.name, err) + rspamd_logger.errx(cfg, 'cannot save data on %s(%s): %s', m.server, m.name, err) else rspamd_logger.infox(cfg, 'saved data on %s(%s)', m.server, m.name) end diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua index 5d1cf5e06..331dbbce2 100644 --- a/src/plugins/lua/gpt.lua +++ b/src/plugins/lua/gpt.lua @@ -20,9 +20,9 @@ local E = {} if confighelp then rspamd_config:add_example(nil, 'gpt', - "Performs postfiltering using GPT model", - [[ -gpt { + "Performs postfiltering using GPT model", + [[ + gpt { # Supported types: openai, ollama type = "openai"; # Your key to access the API @@ -53,7 +53,7 @@ gpt { reason_header = "X-GPT-Reason"; # Use JSON format for response json = false; -} + } ]]) return end @@ -162,7 +162,7 @@ local function default_condition(task) end end lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s, - sym.weight, required_weight) + sym.weight, required_weight) else return false, 'skip as "' .. s .. '" is found' end @@ -182,7 +182,7 @@ local function default_condition(task) end end lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s, - sym.weight, required_weight) + sym.weight, required_weight) end else return false, 'skip as "' .. s .. '" is not found' @@ -253,6 +253,15 @@ local function maybe_extract_json(str) return nil end +-- Helper function to remove <think>...</think> and trim leading newlines +local function clean_gpt_response(text) + -- Remove <think>...</think> including multiline + text = text:gsub("<think>.-</think>", "") + -- Trim leading whitespace and newlines + text = text:gsub("^%s*\n*", "") + return text +end + local function default_openai_json_conversion(task, input) local parser = ucl.parser() local res, err = parser:parse_string(input) @@ -301,7 +310,7 @@ local function default_openai_json_conversion(task, input) elseif reply.probability == "low" then spam_score = 0.1 else - rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability) + rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability) end end @@ -349,17 +358,25 @@ local function default_openai_plain_conversion(task, input) rspamd_logger.errx(task, 'no content in the first message') return end + + -- Clean message + first_message = clean_gpt_response(first_message) + local lines = lua_util.str_split(first_message, '\n') local first_line = clean_reply_line(lines[1]) local spam_score = tonumber(first_line) local reason = clean_reply_line(lines[2]) local categories = lua_util.str_split(clean_reply_line(lines[3]), ',') + if type(reply.usage) == 'table' then + rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens) + end + if spam_score then return spam_score, reason, categories end - rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1]) + rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1], first_message) return end @@ -387,6 +404,10 @@ local function default_ollama_plain_conversion(task, input) rspamd_logger.errx(task, 'no content in the first message') return end + + -- Clean message + first_message = clean_gpt_response(first_message) + local lines = lua_util.str_split(first_message, '\n') local first_line = clean_reply_line(lines[1]) local spam_score = tonumber(first_line) @@ -397,7 +418,7 @@ local function default_ollama_plain_conversion(task, input) return spam_score, reason, categories end - rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s', lines[1]) + rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1], first_message) return end @@ -449,7 +470,7 @@ local function default_ollama_json_conversion(task, input) elseif reply.probability == "low" then spam_score = 0.1 else - rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability) + rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability) end end @@ -477,7 +498,7 @@ local function redis_cache_key(sel_part) env_digest = digest:hex():sub(1, 4) end return string.format('%s_%s', env_digest, - sel_part:get_mimepart():get_digest():sub(1, 24)) + sel_part:get_mimepart():get_digest():sub(1, 24)) end local function process_categories(task, categories) @@ -514,9 +535,9 @@ local function insert_results(task, result, sel_part) end end if result.reason and settings.reason_header then - lua_mime.modify_headers(task, - { add = { [settings.reason_header] = { value = tostring(result.reason), order = 1 } } }) - end + lua_mime.modify_headers(task, + { add = { [settings.reason_header] = { value = tostring(result.reason), order = 1 } } }) + end if cache_context then lua_cache.cache_set(task, redis_cache_key(sel_part), result, cache_context) @@ -540,12 +561,12 @@ local function check_consensus_and_insert_results(task, results, sel_part) nspam = nspam + 1 max_spam_prob = math.max(max_spam_prob, result.probability) lua_util.debugm(N, task, "model: %s; spam: %s; reason: '%s'", - result.model, result.probability, result.reason) + result.model, result.probability, result.reason) else nham = nham + 1 max_ham_prob = math.min(max_ham_prob, result.probability) lua_util.debugm(N, task, "model: %s; ham: %s; reason: '%s'", - result.model, result.probability, result.reason) + result.model, result.probability, result.reason) end if result.reason then @@ -559,23 +580,22 @@ local function check_consensus_and_insert_results(task, results, sel_part) if nspam > nham and max_spam_prob > 0.75 then insert_results(task, { - probability = max_spam_prob, - reason = reason.reason, - categories = reason.categories, - }, - sel_part) + probability = max_spam_prob, + reason = reason.reason, + categories = reason.categories, + }, + sel_part) elseif nham > nspam and max_ham_prob < 0.25 then insert_results(task, { - probability = max_ham_prob, - reason = reason.reason, - categories = reason.categories, - }, - sel_part) + probability = max_ham_prob, + reason = reason.reason, + categories = reason.categories, + }, + sel_part) else -- No consensus lua_util.debugm(N, task, "no consensus") end - end local function get_meta_llm_content(task) @@ -674,7 +694,7 @@ local function openai_check(task, content, sel_part) }, { role = 'user', - content = 'Subject: ' .. task:get_subject() or '', + content = 'Subject: ' .. (task:get_subject() or ''), }, { role = 'user', @@ -726,7 +746,6 @@ local function openai_check(task, content, sel_part) if not rspamd_http.request(http_params) then results[idx].checked = true end - end end diff --git a/src/plugins/lua/hfilter.lua b/src/plugins/lua/hfilter.lua index a783565ab..32102e4f8 100644 --- a/src/plugins/lua/hfilter.lua +++ b/src/plugins/lua/hfilter.lua @@ -131,6 +131,7 @@ local checks_hellohost = [[ /modem[.-][0-9]/i 5 /[0-9][.-]?dhcp/i 5 /wifi[.-][0-9]/i 5 +/[.-]vps[.-]/i 1 ]] local checks_hellohost_map diff --git a/src/plugins/lua/history_redis.lua b/src/plugins/lua/history_redis.lua index a3fdb0ec4..44eb40ad9 100644 --- a/src/plugins/lua/history_redis.lua +++ b/src/plugins/lua/history_redis.lua @@ -138,7 +138,7 @@ end local function history_save(task) local function redis_llen_cb(err, _) if err then - rspamd_logger.errx(task, 'got error %s when writing history row: %s', + rspamd_logger.errx(task, 'got error %s when writing history row', err) end end @@ -188,7 +188,7 @@ local function handle_history_request(task, conn, from, to, reset) if reset then local function redis_ltrim_cb(err, _) if err then - rspamd_logger.errx(task, 'got error %s when resetting history: %s', + rspamd_logger.errx(task, 'got error %s when resetting history', err) conn:send_error(504, '{"error": "' .. err .. '"}') else @@ -258,7 +258,7 @@ local function handle_history_request(task, conn, from, to, reset) (rspamd_util:get_ticks() - t1) * 1000.0) collectgarbage() else - rspamd_logger.errx(task, 'got error %s when getting history: %s', + rspamd_logger.errx(task, 'got error %s when getting history', err) conn:send_error(504, '{"error": "' .. err .. '"}') end diff --git a/src/plugins/lua/known_senders.lua b/src/plugins/lua/known_senders.lua index 5cb2ddcf5..0cbf3cdcf 100644 --- a/src/plugins/lua/known_senders.lua +++ b/src/plugins/lua/known_senders.lua @@ -106,21 +106,26 @@ local function configure_scripts(_, _, _) -- script checks if given recipients are in the local replies set of the sender local redis_zscore_script = [[ local replies_recipients_addrs = ARGV - if replies_recipients_addrs then + if replies_recipients_addrs and #replies_recipients_addrs > 0 then + local found = false for _, rcpt in ipairs(replies_recipients_addrs) do local score = redis.call('ZSCORE', KEYS[1], rcpt) - -- check if score is nil (for some reason redis script does not see if score is a nil value) - if type(score) == 'boolean' then - score = nil - -- 0 is stand for failure code - return 0 + if score then + -- If we found at least one recipient, consider it a match + found = true + break end end - -- first number in return statement is stands for the success/failure code - -- where success code is 1 and failure code is 0 - return 1 + + if found then + -- Success code is 1 + return 1 + else + -- Failure code is 0 + return 0 + end else - -- 0 is a failure code + -- No recipients to check, failure code is 0 return 0 end ]] @@ -259,7 +264,13 @@ local function verify_local_replies_set(task) return nil end - local replies_recipients = task:get_recipients('mime') or E + local replies_recipients = task:get_recipients('smtp') or E + + -- If no recipients, don't proceed + if #replies_recipients == 0 then + lua_util.debugm(N, task, 'No recipients to verify') + return nil + end local replies_sender_string = lua_util.maybe_obfuscate_string(tostring(replies_sender), settings, settings.sender_prefix) @@ -268,13 +279,16 @@ local function verify_local_replies_set(task) local function redis_zscore_script_cb(err, data) if err ~= nil then rspamd_logger.errx(task, 'Could not verify %s local replies set %s', replies_sender_key, err) - end - if data ~= 1 then - lua_util.debugm(N, task, 'Recipients were not verified') return end - lua_util.debugm(N, task, 'Recipients were verified') - task:insert_result(settings.symbol_check_mail_local, 1.0, replies_sender_key) + + -- We need to ensure we're properly checking the result + if data == 1 then + lua_util.debugm(N, task, 'Recipients were verified') + task:insert_result(settings.symbol_check_mail_local, 1.0, replies_sender_key) + else + lua_util.debugm(N, task, 'Recipients were not verified, data=%s', data) + end end local replies_recipients_addrs = {} @@ -284,12 +298,24 @@ local function verify_local_replies_set(task) table.insert(replies_recipients_addrs, replies_recipients[i].addr) end - lua_util.debugm(N, task, 'Making redis request to local replies set') - lua_redis.exec_redis_script(zscore_script_id, + -- Only proceed if we have recipients to check + if #replies_recipients_addrs == 0 then + lua_util.debugm(N, task, 'No recipient addresses to verify') + return nil + end + + lua_util.debugm(N, task, 'Making redis request to local replies set with key %s and recipients %s', + replies_sender_key, table.concat(replies_recipients_addrs, ", ")) + + local ret = lua_redis.exec_redis_script(zscore_script_id, { task = task, is_write = true }, redis_zscore_script_cb, { replies_sender_key }, replies_recipients_addrs) + + if not ret then + rspamd_logger.errx(task, "redis script request wasn't scheduled") + end end local function check_known_incoming_mail_callback(task) diff --git a/src/plugins/lua/milter_headers.lua b/src/plugins/lua/milter_headers.lua index 2daeeed78..17fc90562 100644 --- a/src/plugins/lua/milter_headers.lua +++ b/src/plugins/lua/milter_headers.lua @@ -138,7 +138,7 @@ local function milter_headers(task) local function skip_wanted(hdr) if settings_override then - return true + return false end -- Normal checks local function match_extended_headers_rcpt() diff --git a/src/plugins/lua/mime_types.lua b/src/plugins/lua/mime_types.lua index c69fa1e7b..73cd63c6a 100644 --- a/src/plugins/lua/mime_types.lua +++ b/src/plugins/lua/mime_types.lua @@ -128,6 +128,7 @@ local settings = { inf = 4, its = 4, jnlp = 4, + ['library-ms'] = 4, lnk = 4, ksh = 4, mad = 4, @@ -179,6 +180,7 @@ local settings = { reg = 4, scf = 4, scr = 4, + ['search-ms'] = 4, shs = 4, theme = 4, url = 4, @@ -406,9 +408,9 @@ local function check_mime_type(task) local score2 = check_tables(ext2) -- Check if detected extension match real extension if detected_ext and detected_ext == ext then - check_extension(score1, nil) + check_extension(score1, nil) else - check_extension(score1, score2) + check_extension(score1, score2) end -- Check for archive cloaking like .zip.gz if settings['archive_extensions'][ext2] diff --git a/src/plugins/lua/multimap.lua b/src/plugins/lua/multimap.lua index b96c105b1..8bb62bef1 100644 --- a/src/plugins/lua/multimap.lua +++ b/src/plugins/lua/multimap.lua @@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -]]-- +]] -- if confighelp then return @@ -34,6 +34,16 @@ local redis_params local fun = require "fun" local N = 'multimap' +-- SpamAssassin-like functionality +local sa_atoms = {} +local sa_scores = {} +local sa_meta_rules = {} +local sa_descriptions = {} + +-- Symbol state tracking for graceful map reloads +-- States: 'available', 'loading', 'orphaned' +local regexp_rules_symbol_states = {} + local multimap_grammar -- Parse result in form: <symbol>:<score>|<symbol>|<score> local function parse_multimap_value(parse_rule, p_ret) @@ -54,7 +64,7 @@ local function parse_multimap_value(parse_rule, p_ret) -- Matches: 55.97, -90.8, .9 number.decimal = (number.integer * -- Integer (number.fractional ^ -1)) + -- Fractional - (lpeg.S("+-") * number.fractional) -- Completely fractional number + (lpeg.S("+-") * number.fractional) -- Completely fractional number local sym_start = lpeg.R("az", "AZ") + lpeg.S("_") local sym_elt = sym_start + lpeg.R("09") @@ -100,6 +110,607 @@ local function parse_multimap_value(parse_rule, p_ret) return false, nil, 0.0, {} end +-- SpamAssassin-like line processing functions +local function split_sa_line(str) + local result = {} + if not str then + return result + end + + for token in string.gmatch(str, '%S+') do + table.insert(result, token) + end + + return result +end + +local function parse_sa_regexp(rule_symbol, re_expr) + -- Extract regexp and flags from /regexp/flags format + local re_str, flags = string.match(re_expr, '^/(.+)/([gimxsiu]*)$') + if not re_str then + re_str, flags = string.match(re_expr, '^m{(.+)}([gimxsiu]*)$') + end + if not re_str then + -- Try without delimiters + re_str = re_expr + flags = '' + end + + if flags and flags ~= '' then + re_str = '(?' .. flags .. ')' .. re_str + end + + local re = rspamd_regexp.create(re_str) + if not re then + rspamd_logger.errx(rspamd_config, 'cannot create regexp for %s: %s', rule_symbol, re_expr) + return nil + end + + return re +end + +local function words_to_sa_re(words, start) + return table.concat(fun.totable(fun.drop_n(start, words)), " ") +end + +-- Helper function to create SA rule callbacks +local function create_sa_atom_function(name, re, match_type, opts) + return function(task) + if not re then + rspamd_logger.errx(task, 're is missing for atom %s', name) + return 0 + end + + local function process_re_match(re_obj, tsk, re_type, header, strong) + local res = 0 + if type(jit) == 'table' then + res = tsk:process_regexp(re_obj, re_type, header, strong) + else + res = tsk:process_regexp(re_obj, re_type, header, strong) + end + return res + end + + local ret = 0 + + if match_type == 'header' then + ret = process_re_match(re, task, 'header', opts.header, opts.strong or false) + elseif match_type == 'body' then + ret = process_re_match(re, task, 'sabody') + elseif match_type == 'rawbody' then + ret = process_re_match(re, task, 'sarawbody') + elseif match_type == 'full' then + ret = process_re_match(re, task, 'body') + elseif match_type == 'uri' then + ret = process_re_match(re, task, 'url') + else + -- Default to body + ret = process_re_match(re, task, 'sabody') + end + + if opts and opts.negate then + -- Negate the result for !~ operators + ret = (ret > 0) and 0 or 1 + lua_util.debugm(N, task, 'SA atom %s negated result: %s', name, ret) + end + + lua_util.debugm(N, task, 'SA atom %s result: %s', name, ret) + return ret + end +end + +local function process_sa_line(rule, line) + line = lua_util.str_trim(line) + + if string.len(line) == 0 or string.sub(line, 1, 1) == '#' then + return + end + + -- Add debug logging + lua_util.debugm(N, rspamd_config, 'Processing SA line for rule %s: %s', rule.symbol, line) + + local words = split_sa_line(line) + if not words or #words == 0 then + lua_util.debugm(N, rspamd_config, 'Skipping empty or invalid line: %s', line) + return + end + + local rule_name = rule.symbol + local scope_name = rule.scope_name or rule_name + + -- All regexps for this SA-style rule are registered in a dedicated scope + -- This allows clean removal and replacement when the map is reloaded + + if words[1] == 'header' then + -- header SYMBOL Header =~ /regexp/flags + if #words >= 4 and (words[4] == '=~' or words[4] == '!~') then + local atom_name = words[2] + local header_name = words[3] + local re_expr = words_to_sa_re(words, 4) + + -- Skip =~ or !~ + re_expr = string.gsub(re_expr, '^[!=]~%s*', '') + + local re = parse_sa_regexp(atom_name, re_expr) + if re then + -- Register regexp with cache in specific scope + rspamd_config:register_regexp_scoped(scope_name, { + re = re, + type = 'header', + header = header_name, + pcre_only = false, + }) + + re:set_limit(0) -- No limit + re:set_max_hits(1) + + local negate = (words[4] == '!~') + sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'header', { + header = header_name, + strong = false, + negate = negate + }) + + -- Track atom state + regexp_rules_symbol_states[atom_name] = { + state = 'loading', + rule_name = rule_name, + type = 'atom' + } + + lua_util.debugm(N, rspamd_config, 'added SA header atom: %s for header %s (scope: %s)', + atom_name, header_name, scope_name) + end + end + elseif words[1] == 'body' then + -- body SYMBOL /regexp/flags + if #words >= 3 then + local atom_name = words[2] + local re_expr = words_to_sa_re(words, 2) + + local re = parse_sa_regexp(atom_name, re_expr) + if re then + rspamd_config:register_regexp_scoped(scope_name, { + re = re, + type = 'sabody', + pcre_only = false, + }) + + re:set_limit(0) + re:set_max_hits(1) + + sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'body', {}) + + -- Track atom state + regexp_rules_symbol_states[atom_name] = { + state = 'loading', + rule_name = rule_name, + type = 'atom' + } + + lua_util.debugm(N, rspamd_config, 'added SA body atom: %s (scope: %s)', atom_name, scope_name) + end + end + elseif words[1] == 'rawbody' then + -- rawbody SYMBOL /regexp/flags + if #words >= 3 then + local atom_name = words[2] + local re_expr = words_to_sa_re(words, 2) + + local re = parse_sa_regexp(atom_name, re_expr) + if re then + rspamd_config:register_regexp_scoped(scope_name, { + re = re, + type = 'sarawbody', + pcre_only = false, + }) + + re:set_limit(0) + re:set_max_hits(1) + + sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'rawbody', {}) + + -- Track atom state + regexp_rules_symbol_states[atom_name] = { + state = 'loading', + rule_name = rule_name, + type = 'atom' + } + + lua_util.debugm(N, rspamd_config, 'added SA rawbody atom: %s (scope: %s)', atom_name, scope_name) + end + end + elseif words[1] == 'uri' then + -- uri SYMBOL /regexp/flags + if #words >= 3 then + local atom_name = words[2] + local re_expr = words_to_sa_re(words, 2) + + local re = parse_sa_regexp(atom_name, re_expr) + if re then + rspamd_config:register_regexp_scoped(scope_name, { + re = re, + type = 'url', + pcre_only = false, + }) + + re:set_limit(0) + re:set_max_hits(1) + + sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'uri', {}) + + -- Track atom state + regexp_rules_symbol_states[atom_name] = { + state = 'loading', + rule_name = rule_name, + type = 'atom' + } + + lua_util.debugm(N, rspamd_config, 'added SA uri atom: %s (scope: %s)', atom_name, scope_name) + end + end + elseif words[1] == 'full' then + -- full SYMBOL /regexp/flags + if #words >= 3 then + local atom_name = words[2] + local re_expr = words_to_sa_re(words, 2) + + local re = parse_sa_regexp(atom_name, re_expr) + if re then + rspamd_config:register_regexp_scoped(scope_name, { + re = re, + type = 'body', + pcre_only = false, + }) + + re:set_limit(0) + re:set_max_hits(1) + + sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'full', {}) + + -- Track atom state + regexp_rules_symbol_states[atom_name] = { + state = 'loading', + rule_name = rule_name, + type = 'atom' + } + + lua_util.debugm(N, rspamd_config, 'added SA full atom: %s (scope: %s)', atom_name, scope_name) + end + end + elseif words[1] == 'meta' then + -- meta SYMBOL expression + if #words >= 3 then + local meta_name = words[2] + local meta_expr = words_to_sa_re(words, 2) + + sa_meta_rules[meta_name] = { + symbol = meta_name, + expression = meta_expr, + rule_name = rule_name + } + + -- Track symbol state + regexp_rules_symbol_states[meta_name] = { + state = 'loading', + rule_name = rule_name, + type = 'meta' + } + + lua_util.debugm(N, rspamd_config, 'added SA meta rule: %s = %s', meta_name, meta_expr) + end + elseif words[1] == 'score' then + -- score SYMBOL value + if #words >= 3 then + local score_symbol = words[2] + local score_value = tonumber(words[3]) + + if score_value then + sa_scores[score_symbol] = score_value + lua_util.debugm(N, rspamd_config, 'added SA score: %s = %s', score_symbol, score_value) + end + end + elseif words[1] == 'describe' then + -- describe SYMBOL description text + if #words >= 3 then + local desc_symbol = words[2] + local desc_text = words_to_sa_re(words, 2) + + sa_descriptions[desc_symbol] = desc_text + lua_util.debugm(N, rspamd_config, 'added SA description: %s = %s', desc_symbol, desc_text) + end + end +end + +local function parse_sa_atom(str) + local atom = table.concat(fun.totable(fun.take_while(function(c) + if string.find(', \t()><+!|&\n', c, 1, true) then + return false + end + return true + end, fun.iter(str))), '') + + return atom +end + +-- Forward declaration for mutual recursion +local create_sa_meta_callback + +local function gen_sa_process_atom_cb(task, rule_name) + return function(atom) + -- Check symbol state first + local state_info = regexp_rules_symbol_states[atom] + if state_info then + if state_info.state == 'orphaned' or state_info.state == 'loading' then + -- Double-check by looking at scope loaded state + local scope_loaded = false + for _, rule in ipairs(rules) do + if rule.symbol == state_info.rule_name and rule.scope_name then + scope_loaded = rspamd_config:is_regexp_scope_loaded(rule.scope_name) + break + end + end + + if scope_loaded and (state_info.type == 'atom' and sa_atoms[atom]) then + -- Update state to available if scope is loaded and atom exists + state_info.state = 'available' + lua_util.debugm(N, task, 'regexp_rules atom %s was %s, but scope is loaded - marking as available', + atom, state_info.state) + else + lua_util.debugm(N, task, 'regexp_rules atom %s is %s, returning 0', atom, state_info.state) + return 0 + end + end + end + + local atom_cb = sa_atoms[atom] + + if atom_cb then + local res = atom_cb(task) + + -- Return result without logging each atom + return res + else + -- Check if this is a SA meta rule + local meta_rule = sa_meta_rules[atom] + if meta_rule then + local meta_cb = create_sa_meta_callback(meta_rule) + local res = meta_cb(task) + return res or 0 + end + + -- External atom - check if task has this symbol + if task:has_symbol(atom) then + return 1 + end + end + return 0 + end +end + +create_sa_meta_callback = function(meta_rule) + return function(task) + -- Check symbol state before execution + local state_info = regexp_rules_symbol_states[meta_rule.symbol] + if state_info then + if state_info.state == 'orphaned' or state_info.state == 'loading' then + -- Double-check by looking at scope loaded state + local scope_loaded = false + for _, rule in ipairs(rules) do + if rule.symbol == state_info.rule_name and rule.scope_name then + scope_loaded = rspamd_config:is_regexp_scope_loaded(rule.scope_name) + break + end + end + + if scope_loaded and sa_meta_rules[meta_rule.symbol] then + -- Update state to available if scope is loaded and meta rule exists + state_info.state = 'available' + lua_util.debugm(N, task, 'regexp_rules meta %s was %s, but scope is loaded - marking as available', + meta_rule.symbol, state_info.state) + else + lua_util.debugm(N, task, 'regexp_rules meta %s is %s, skipping execution', + meta_rule.symbol, state_info.state) + return 0 + end + end + end + + local cached = task:cache_get('sa_multimap_metas_processed') + + if not cached then + cached = {} + task:cache_set('sa_multimap_metas_processed', cached) + end + + local function exclude_sym_filter(sopt) + -- Exclude self and atoms starting with __ + return sopt ~= meta_rule.symbol + end + + local already_processed = cached[meta_rule.symbol] + + if not (already_processed and already_processed['default']) then + local expression = rspamd_expression.create(meta_rule.expression, + parse_sa_atom, + rspamd_config:get_mempool()) + if not expression then + rspamd_logger.errx(rspamd_config, 'Cannot parse SA meta expression: %s', meta_rule.expression) + return + end + + local function exec_symbol(cur_res) + local res, trace = expression:process_traced(gen_sa_process_atom_cb(task, meta_rule.rule_name)) + + if res > 0 then + local filtered_trace = fun.totable(fun.take_n(5, + fun.map(function(elt) + return elt:gsub('^__', '') + end, fun.filter(exclude_sym_filter, trace)))) + lua_util.debugm(N, task, 'SA meta %s matched with result: %s; trace %s; filtered trace %s', + meta_rule.symbol, res, trace, filtered_trace) + task:insert_result_named(cur_res, meta_rule.symbol, 1.0, filtered_trace) + end + + if not cached[meta_rule.symbol] then + cached[meta_rule.symbol] = {} + end + cached[meta_rule.symbol][cur_res] = res + + return res + end + + -- Invoke for all named results + local named_results = task:get_all_named_results() + for _, cur_res in ipairs(named_results) do + exec_symbol(cur_res) + end + else + -- We have cached the result + local res = already_processed['default'] or 0 + lua_util.debugm(N, task, 'cached SA meta result for %s: %s', meta_rule.symbol, res) + end + end +end + +-- Initialize SA meta rules after all atoms are processed +local function finalize_sa_rules() + lua_util.debugm(N, rspamd_config, 'Finalizing SA rules - processing %s meta rules', + fun.length(sa_meta_rules)) + + for meta_name, meta_rule in pairs(sa_meta_rules) do + local score = sa_scores[meta_name] or 1.0 + local description = sa_descriptions[meta_name] or ('multimap symbol ' .. meta_name) + + lua_util.debugm(N, rspamd_config, 'Registering SA meta rule %s (score: %s, expression: %s)', + meta_name, score, meta_rule.expression) + + local id = rspamd_config:register_symbol({ + name = meta_name, + weight = score, + callback = create_sa_meta_callback(meta_rule), + type = 'normal', + flags = 'one_shot', + augmentations = {}, + }) + + lua_util.debugm(N, rspamd_config, 'Successfully registered SA meta symbol %s with id %s (callback attached)', + meta_name, id) + + rspamd_config:set_metric_symbol({ + name = meta_name, + score = score, + description = description, + group = N, + }) + + -- Also register meta rule as an atom so it can be used in other meta expressions + sa_atoms[meta_name] = create_sa_meta_callback(meta_rule) + + -- Mark symbol as available + if regexp_rules_symbol_states[meta_name] then + regexp_rules_symbol_states[meta_name].state = 'available' + else + regexp_rules_symbol_states[meta_name] = { + state = 'available', + rule_name = meta_rule.rule_name, + type = 'meta' + } + end + + lua_util.debugm(N, rspamd_config, 'registered SA meta symbol: %s (score: %s)', + meta_name, score) + end + + -- Mark orphaned symbols - only check meta symbols (not atoms) since atoms are just expression parts + for symbol, state_info in pairs(regexp_rules_symbol_states) do + if state_info.type == 'meta' and state_info.state == 'available' and not sa_meta_rules[symbol] then + state_info.state = 'orphaned' + state_info.orphaned_at = os.time() + lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as orphaned', symbol) + end + end + + lua_util.debugm(N, rspamd_config, 'SA rules finalization complete: registered %s meta rules with callbacks', + fun.length(sa_meta_rules)) +end + +-- Helper function to get regexp_rules symbol state statistics (only meta symbols, not atoms) +local function get_regexp_rules_symbol_stats() + local stats = { + available = 0, + loading = 0, + orphaned = 0, + total = 0 + } + + for _, state_info in pairs(regexp_rules_symbol_states) do + if state_info.type == 'meta' then + stats[state_info.state] = (stats[state_info.state] or 0) + 1 + stats.total = stats.total + 1 + end + end + + return stats +end + +-- Helper function to synchronize symbol states with loaded scopes +local function sync_regexp_rules_symbol_states() + lua_util.debugm(N, rspamd_config, 'Synchronizing regexp_rules symbol states with loaded scopes') + + -- Check each rule to see if its scope is loaded + for _, rule in ipairs(rules) do + if rule.type == 'regexp_rules' and rule.scope_name then + local scope_loaded = rspamd_config:is_regexp_scope_loaded(rule.scope_name) + + if scope_loaded then + -- Mark all meta symbols for this rule as available (atoms are just expression parts) + local updated_count = 0 + for _, state_info in pairs(regexp_rules_symbol_states) do + if state_info.type == 'meta' and state_info.rule_name == rule.symbol and state_info.state ~= 'available' then + state_info.state = 'available' + updated_count = updated_count + 1 + end + end + + lua_util.debugm(N, rspamd_config, 'Scope %s is loaded, marked %s symbols as available', + rule.scope_name, updated_count) + else + lua_util.debugm(N, rspamd_config, 'Scope %s is not loaded', rule.scope_name) + end + end + end + + local stats = get_regexp_rules_symbol_stats() + lua_util.debugm(N, rspamd_config, 'Symbol state stats after sync: available=%s, loading=%s, orphaned=%s, total=%s', + stats.available, stats.loading, stats.orphaned, stats.total) +end + +-- Optional cleanup function to remove old orphaned symbols (can be called periodically) +local function cleanup_orphaned_regexp_rules_symbols(max_age_seconds) + max_age_seconds = max_age_seconds or 3600 -- Default to 1 hour + local current_time = os.time() + local removed = 0 + + for symbol, state_info in pairs(regexp_rules_symbol_states) do + if state_info.type == 'meta' and state_info.state == 'orphaned' and state_info.orphaned_at then + if (current_time - state_info.orphaned_at) > max_age_seconds then + regexp_rules_symbol_states[symbol] = nil + -- Only meta rules should be cleaned up from sa_meta_rules + sa_meta_rules[symbol] = nil + removed = removed + 1 + lua_util.debugm(N, rspamd_config, 'cleaned up orphaned regexp_rules symbol: %s', symbol) + end + end + end + + if removed > 0 then + lua_util.debugm(N, rspamd_config, 'cleaned up %s orphaned regexp_rules symbols', removed) + end + + return removed +end + local value_types = { ip = { get_value = function(ip) @@ -531,7 +1142,7 @@ local function multimap_query_redis(key, task, value, callback) false, -- is write redis_map_cb, --callback cmd, -- command - srch -- arguments + srch -- arguments ) end @@ -631,7 +1242,6 @@ local function multimap_callback(task, rule) else task:insert_result(forced, symbol, score, tostring(opt)) end - else task:insert_result(forced, symbol, score) end @@ -671,7 +1281,6 @@ local function multimap_callback(task, rule) local fn = multimap_filters[r.type] if fn then - local filtered_value = fn(task, r.filter, value, r) lua_util.debugm(N, task, 'apply filter %s for rule %s: %s -> %s', r.filter, r.symbol, value, filtered_value) @@ -1097,6 +1706,12 @@ local function multimap_callback(task, rule) end end end, + regexp_rules = function() + -- For regexp_rules, the meta rules are registered as separate symbols + -- This is just a placeholder callback + lua_util.debugm(N, task, 'Regexp rules callback for %s - meta rules are registered as separate symbols', + rule.symbol) + end, } local rt = rule.type @@ -1184,7 +1799,8 @@ local function add_multimap_rule(key, newrule) country = true, mempool = true, selector = true, - combined = true + combined = true, + regexp_rules = true } if newrule['message_func'] then @@ -1275,6 +1891,145 @@ local function add_multimap_rule(key, newrule) else ret = true end + elseif newrule.type == 'regexp_rules' then + -- SpamAssassin-like map processing using callback map with line-by-line processing + local map_ucl = newrule.map + if type(map_ucl) == 'string' then + -- Convert string URL to UCL format + map_ucl = { + url = map_ucl, + description = newrule.description + } + elseif type(map_ucl) == 'table' and not map_ucl.url and not map_ucl.urls then + rspamd_logger.errx(rspamd_config, 'SA map %s has no URL defined', newrule.symbol) + return nil + end + + -- Set scope name for this regexp_rules map + local scope_name = newrule.symbol + newrule.scope_name = scope_name + + -- Remove existing scope if it exists to ensure clean state + if rspamd_config:find_regexp_scope(scope_name) then + lua_util.debugm(N, rspamd_config, 'removing existing regexp scope: %s', scope_name) + rspamd_config:remove_regexp_scope(scope_name) + end + + -- Mark the scope as unloaded during map processing + -- The scope will be created automatically when first regexp is added + local first_line_processed = false + + -- Create callback map with by_line processing + newrule.map_obj = rspamd_config:add_map({ + type = "callback", + url = map_ucl.url or map_ucl.urls or map_ucl, + description = newrule.description or 'SA-style multimap: ' .. newrule.symbol, + callback = function(pseudo_key, pseudo_value) + -- We have values being parsed as kv pairs, but they are not, so we concat them and use as a line + local line = pseudo_key .. ' ' .. pseudo_value + -- Add debug logging to see if callback is called + lua_util.debugm(N, rspamd_config, 'regexp_rules callback called for line: %s', line) + + -- Mark scope as unloaded on first line + if not first_line_processed then + first_line_processed = true + lua_util.debugm(N, rspamd_config, 'processing first line of regexp_rules map %s', newrule.symbol) + + -- Mark all existing symbols for this scope as loading + for symbol, state_info in pairs(regexp_rules_symbol_states) do + if state_info.rule_name == newrule.symbol then + state_info.state = 'loading' + lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as loading for scope %s reload', + symbol, scope_name) + end + end + + -- Clear atoms and meta rules for this scope + local symbols_to_remove = {} + for symbol, _ in pairs(sa_meta_rules) do + if regexp_rules_symbol_states[symbol] and regexp_rules_symbol_states[symbol].rule_name == newrule.symbol then + table.insert(symbols_to_remove, symbol) + end + end + + for _, symbol in ipairs(symbols_to_remove) do + sa_atoms[symbol] = nil + sa_meta_rules[symbol] = nil + lua_util.debugm(N, rspamd_config, 'cleared regexp_rules symbol %s for scope %s reload', + symbol, scope_name) + end + + -- The scope will be created by process_sa_line when first regexp is added + -- We mark it as unloaded immediately after creation + rspamd_config:set_regexp_scope_loaded(scope_name, false) + lua_util.debugm(N, rspamd_config, 'marked regexp scope %s as unloaded during processing', scope_name) + end + process_sa_line(newrule, line) + end, + by_line = true, -- Process line by line + opaque_data = false, -- Use plain strings + }) + + -- Add on_load callback to mark scope as loaded when map processing is complete + if newrule.map_obj then + newrule.map_obj:on_load(function() + lua_util.debugm(N, rspamd_config, 'regexp_rules map %s loaded successfully', newrule.symbol) + + -- Mark all meta symbols for this scope as available (atoms are just expression parts) + for symbol, state_info in pairs(regexp_rules_symbol_states) do + if state_info.type == 'meta' and state_info.rule_name == newrule.symbol then + if state_info.state == 'loading' then + -- Check if this meta symbol still exists in the rules + if sa_meta_rules[symbol] then + state_info.state = 'available' + lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as available after map load', symbol) + else + -- Symbol was removed in the new map + state_info.state = 'orphaned' + state_info.orphaned_at = os.time() + lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as orphaned after map load', symbol) + end + end + end + end + + -- Mark scope as loaded when map processing is complete + -- Check if scope exists (it might not if map was empty) + if rspamd_config:find_regexp_scope(scope_name) then + rspamd_config:set_regexp_scope_loaded(scope_name, true) + lua_util.debugm(N, rspamd_config, 'marked regexp scope %s as loaded after map processing', scope_name) + + -- Trigger hyperscan compilation for this updated scope + newrule.map_obj:trigger_hyperscan_compilation() + lua_util.debugm(N, rspamd_config, 'triggered hyperscan compilation for scope %s after map loading', + scope_name) + else + lua_util.debugm(N, rspamd_config, 'regexp scope %s not created (empty map)', scope_name) + end + + -- Synchronize symbol states after map load to ensure all processes see correct states + sync_regexp_rules_symbol_states() + + -- Finalize SA rules immediately after map load + finalize_sa_rules() + + -- Promote symcache resort after dynamic symbol registration + rspamd_config:promote_symbols_cache_resort() + lua_util.debugm(N, rspamd_config, 'promoted symcache resort after loading SA rules from map %s', + newrule.symbol) + end) + end + + if newrule.map_obj then + -- Mark this rule as using SA functionality + newrule.uses_sa = true + lua_util.debugm(N, rspamd_config, 'created regexp_rules map %s with scope: %s', + newrule.symbol, scope_name) + ret = true + else + rspamd_logger.warnx(rspamd_config, 'Cannot add SA-style rule: map doesn\'t exists: %s', + newrule['map']) + end else if newrule['type'] == 'ip' then newrule.map_obj = lua_maps.map_add_from_ucl(newrule.map, 'radix', @@ -1282,7 +2037,7 @@ local function add_multimap_rule(key, newrule) if newrule.map_obj then ret = true else - rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1', + rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s', newrule['map']) end elseif newrule['type'] == 'received' then @@ -1303,7 +2058,7 @@ local function add_multimap_rule(key, newrule) if newrule.map_obj then ret = true else - rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1', + rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s', newrule['map']) end else @@ -1312,12 +2067,11 @@ local function add_multimap_rule(key, newrule) if newrule.map_obj then ret = true else - rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1', + rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s', newrule['map']) end end elseif known_generic_types[newrule.type] then - if newrule.filter == 'ip_addr' then newrule.map_obj = lua_maps.map_add_from_ucl(newrule.map, 'radix', newrule.description) @@ -1328,11 +2082,14 @@ local function add_multimap_rule(key, newrule) if newrule.map_obj then ret = true else - rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1', + rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s', newrule['map']) end elseif newrule['type'] == 'dnsbl' then ret = true + else + rspamd_logger.errx(rspamd_config, 'cannot add rule %s: invalid type %s', + key, newrule['type']) end end @@ -1390,6 +2147,29 @@ end local opts = rspamd_config:get_all_opt(N) if opts and type(opts) == 'table' then redis_params = rspamd_parse_redis_server(N) + + -- Initialize regexp_rules symbol states from existing sa_atoms and sa_meta_rules + -- This helps with module reload scenarios + for atom_name, _ in pairs(sa_atoms) do + if not regexp_rules_symbol_states[atom_name] then + regexp_rules_symbol_states[atom_name] = { + state = 'available', + rule_name = 'unknown', + type = 'atom' + } + end + end + + for meta_name, meta_rule in pairs(sa_meta_rules) do + if not regexp_rules_symbol_states[meta_name] then + regexp_rules_symbol_states[meta_name] = { + state = 'available', + rule_name = meta_rule.rule_name or 'unknown', + type = 'meta' + } + end + end + for k, m in pairs(opts) do if type(m) == 'table' and m['type'] then local rule = add_multimap_rule(k, m) @@ -1462,5 +2242,29 @@ if opts and type(opts) == 'table' then if #rules == 0 then lua_util.disable_module(N, "config") + else + -- Finalize SpamAssassin-like rules after all maps are processed + local has_sa_rules = false + for _, rule in ipairs(rules) do + if rule.uses_sa then + has_sa_rules = true + break + end + end + + if has_sa_rules then + -- Add a callback to synchronize symbol states in worker processes + rspamd_config:add_on_load(function(cfg, ev_base, worker) + -- Synchronize symbol states with loaded scopes in worker processes + if worker then + sync_regexp_rules_symbol_states() + end + end) + + -- Export utility functions for debugging/monitoring + rspamd_plugins.multimap = rspamd_plugins.multimap or {} + rspamd_plugins.multimap.get_regexp_rules_symbol_stats = get_regexp_rules_symbol_stats + rspamd_plugins.multimap.cleanup_orphaned_regexp_rules_symbols = cleanup_orphaned_regexp_rules_symbols + end end end diff --git a/src/plugins/lua/ratelimit.lua b/src/plugins/lua/ratelimit.lua index c20e61b17..d463658fa 100644 --- a/src/plugins/lua/ratelimit.lua +++ b/src/plugins/lua/ratelimit.lua @@ -373,7 +373,7 @@ local function ratelimit_cb(task) local function gen_check_cb(prefix, bucket, lim_name, lim_key) return function(err, data) if err then - rspamd_logger.errx('cannot check limit %s: %s %s', prefix, err, data) + rspamd_logger.errx('cannot check limit %s: %s', prefix, err) elseif type(data) == 'table' and data[1] then lua_util.debugm(N, task, "got reply for limit %s (%s / %s); %s burst, %s:%s dyn, %s leaked", @@ -416,7 +416,7 @@ local function ratelimit_cb(task) task:set_pre_result('soft reject', message_func(task, lim_name, prefix, bucket, lim_key), N) else - task:set_pre_result('soft reject', bucket.message) + task:set_pre_result('soft reject', bucket.message, N) end end end @@ -476,7 +476,7 @@ local function maybe_cleanup_pending(task) local bucket = v.bucket local function cleanup_cb(err, data) if err then - rspamd_logger.errx('cannot cleanup limit %s: %s %s', k, err, data) + rspamd_logger.errx('cannot cleanup limit %s: %s', k, err) else lua_util.debugm(N, task, 'cleaned pending bucked for %s: %s', k, data) end diff --git a/src/plugins/lua/rbl.lua b/src/plugins/lua/rbl.lua index af4a4cd15..b5b904b00 100644 --- a/src/plugins/lua/rbl.lua +++ b/src/plugins/lua/rbl.lua @@ -1077,7 +1077,7 @@ local function add_rbl(key, rbl, global_opts) rbl.selector_flatten) if not sel then - rspamd_logger.errx('invalid selector for rbl rule %s: %s', key, selector) + rspamd_logger.errx(rspamd_config, 'invalid selector for rbl rule %s: %s', key, selector) return false end diff --git a/src/plugins/lua/replies.lua b/src/plugins/lua/replies.lua index 08fb68bc7..2f0153d00 100644 --- a/src/plugins/lua/replies.lua +++ b/src/plugins/lua/replies.lua @@ -79,8 +79,8 @@ local function configure_redis_scripts(_, _) end ]] local set_script_zadd_global = lua_util.jinja_template(redis_script_zadd_global, - { max_global_size = settings.max_global_size }) - global_replies_set_script = lua_redis.add_redis_script(set_script_zadd_global, redis_params) + { max_global_size = settings.max_global_size }) + global_replies_set_script = lua_redis.add_redis_script(set_script_zadd_global, redis_params) local redis_script_zadd_local = [[ redis.call('ZREMRANGEBYRANK', KEYS[1], 0, -({= max_local_size =} + 1)) -- keeping size of local replies set @@ -102,7 +102,7 @@ local function configure_redis_scripts(_, _) end ]] local set_script_zadd_local = lua_util.jinja_template(redis_script_zadd_local, - { expire_time = settings.expire, max_local_size = settings.max_local_size }) + { expire_time = settings.expire, max_local_size = settings.max_local_size }) local_replies_set_script = lua_redis.add_redis_script(set_script_zadd_local, redis_params) end @@ -110,7 +110,7 @@ local function replies_check(task) local in_reply_to local function check_recipient(stored_rcpt) - local rcpts = task:get_recipients('mime') + local rcpts = task:get_recipients('smtp') lua_util.debugm(N, task, 'recipients: %s', rcpts) if rcpts then local filter_predicate = function(input_rcpt) @@ -119,7 +119,7 @@ local function replies_check(task) return real_rcpt_h == stored_rcpt end - if fun.any(filter_predicate, fun.map(function(rcpt) + if fun.all(filter_predicate, fun.map(function(rcpt) return rcpt.addr or '' end, rcpts)) then lua_util.debugm(N, task, 'reply to %s validated', in_reply_to) @@ -155,9 +155,9 @@ local function replies_check(task) end lua_redis.exec_redis_script(global_replies_set_script, - { task = task, is_write = true }, - zadd_global_set_cb, - { global_key }, params) + { task = task, is_write = true }, + zadd_global_set_cb, + { global_key }, params) end local function add_to_replies_set(recipients) @@ -173,7 +173,7 @@ local function replies_check(task) local params = recipients lua_util.debugm(N, task, - 'Adding recipients %s to sender %s local replies set', recipients, sender_key) + 'Adding recipients %s to sender %s local replies set', recipients, sender_key) local function zadd_cb(err, _) if err ~= nil then @@ -189,9 +189,9 @@ local function replies_check(task) table.insert(params, 1, task_time_str) lua_redis.exec_redis_script(local_replies_set_script, - { task = task, is_write = true }, - zadd_cb, - { sender_key }, params) + { task = task, is_write = true }, + zadd_cb, + { sender_key }, params) end local function redis_get_cb(err, data, addr) @@ -387,7 +387,7 @@ if opts then end lua_redis.register_prefix(settings.sender_prefix, N, - 'Prefix to identify replies sets') + 'Prefix to identify replies sets') local id = rspamd_config:register_symbol({ name = 'REPLIES_CHECK', diff --git a/src/plugins/lua/reputation.lua b/src/plugins/lua/reputation.lua index bd7d91932..eacaee064 100644 --- a/src/plugins/lua/reputation.lua +++ b/src/plugins/lua/reputation.lua @@ -200,7 +200,9 @@ local function dkim_reputation_filter(task, rule) end end - if sel_tld and requests[sel_tld] then + if rule.selector.config.exclusion_map and sel_tld and rule.selector.config.exclusion_map:get_key(sel_tld) then + lua_util.debugm(N, task, 'DKIM domain %s is excluded from reputation scoring', sel_tld) + elseif sel_tld and requests[sel_tld] then if requests[sel_tld] == 'a' then rep_accepted = rep_accepted + generic_reputation_calc(v, rule, 1.0, task) end @@ -243,9 +245,13 @@ local function dkim_reputation_idempotent(task, rule) if sc then for dom, res in pairs(requests) do - -- tld + "." + check_result, e.g. example.com.+ - reputation for valid sigs - local query = string.format('%s.%s', dom, res) - rule.backend.set_token(task, rule, nil, query, sc) + if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(dom) then + lua_util.debugm(N, task, 'DKIM domain %s is excluded from reputation update', dom) + else + -- tld + "." + check_result, e.g. example.com.+ - reputation for valid sigs + local query = string.format('%s.%s', dom, res) + rule.backend.set_token(task, rule, nil, query, sc) + end end end end @@ -277,6 +283,7 @@ local dkim_selector = { outbound = true, inbound = true, max_accept_adjustment = 2.0, -- How to adjust accepted DKIM score + exclusion_map = nil }, dependencies = { "DKIM_TRACE" }, filter = dkim_reputation_filter, -- used to get scores @@ -356,10 +363,14 @@ local function url_reputation_filter(task, rule) for i, res in pairs(results) do local req = requests[i] if req then - local url_score = generic_reputation_calc(res, rule, - req[2] / mhits, task) - lua_util.debugm(N, task, "score for url %s is %s, score=%s", req[1], url_score, score) - score = score + url_score + if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(req[1]) then + lua_util.debugm(N, task, 'URL domain %s is excluded from reputation scoring', req[1]) + else + local url_score = generic_reputation_calc(res, rule, + req[2] / mhits, task) + lua_util.debugm(N, task, "score for url %s is %s, score=%s", req[1], url_score, score) + score = score + url_score + end end end @@ -386,7 +397,11 @@ local function url_reputation_idempotent(task, rule) if sc then for _, tld in ipairs(requests) do - rule.backend.set_token(task, rule, nil, tld[1], sc) + if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(tld[1]) then + lua_util.debugm(N, task, 'URL domain %s is excluded from reputation update', tld[1]) + else + rule.backend.set_token(task, rule, nil, tld[1], sc) + end end end end @@ -401,6 +416,7 @@ local url_selector = { check_from = true, outbound = true, inbound = true, + exclusion_map = nil }, filter = url_reputation_filter, -- used to get scores idempotent = url_reputation_idempotent -- used to set scores @@ -439,6 +455,11 @@ local function ip_reputation_filter(task, rule) ip = ip:apply_mask(cfg.ipv6_mask) end + if cfg.exclusion_map and cfg.exclusion_map:get_key(ip) then + lua_util.debugm(N, task, 'IP %s is excluded from reputation scoring', tostring(ip)) + return + end + local pool = task:get_mempool() local asn = pool:get_variable("asn") local country = pool:get_variable("country") @@ -554,6 +575,11 @@ local function ip_reputation_idempotent(task, rule) ip = ip:apply_mask(cfg.ipv6_mask) end + if cfg.exclusion_map and cfg.exclusion_map:get_key(ip) then + lua_util.debugm(N, task, 'IP %s is excluded from reputation update', tostring(ip)) + return + end + local pool = task:get_mempool() local asn = pool:get_variable("asn") local country = pool:get_variable("country") @@ -600,6 +626,7 @@ local ip_selector = { inbound = true, ipv4_mask = 32, -- Mask bits for ipv4 ipv6_mask = 64, -- Mask bits for ipv6 + exclusion_map = nil }, --dependencies = {"ASN"}, -- ASN is a prefilter now... init = ip_reputation_init, @@ -621,6 +648,11 @@ local function spf_reputation_filter(task, rule) local cr = require "rspamd_cryptobox_hash" local hkey = cr.create(spf_record):base32():sub(1, 32) + if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(hkey) then + lua_util.debugm(N, task, 'SPF record %s is excluded from reputation scoring', hkey) + return + end + lua_util.debugm(N, task, 'check spf record %s -> %s', spf_record, hkey) local function tokens_cb(err, token, values) @@ -649,6 +681,11 @@ local function spf_reputation_idempotent(task, rule) local cr = require "rspamd_cryptobox_hash" local hkey = cr.create(spf_record):base32():sub(1, 32) + if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(hkey) then + lua_util.debugm(N, task, 'SPF record %s is excluded from reputation update', hkey) + return + end + lua_util.debugm(N, task, 'set spf record %s -> %s = %s', spf_record, hkey, sc) rule.backend.set_token(task, rule, nil, hkey, sc) @@ -663,6 +700,7 @@ local spf_selector = { max_score = nil, outbound = true, inbound = true, + exclusion_map = nil }, dependencies = { "R_SPF_ALLOW" }, filter = spf_reputation_filter, -- used to get scores @@ -697,6 +735,13 @@ local function generic_reputation_init(rule) 'Whitelisted selectors') end + if cfg.exclusion_map then + cfg.exclusion_map = lua_maps.map_add('reputation', + 'generic_exclusion', + 'set', + 'Excluded selectors') + end + return true end @@ -706,6 +751,10 @@ local function generic_reputation_filter(task, rule) local function tokens_cb(err, token, values) if values then + if cfg.exclusion_map and cfg.exclusion_map:get_key(token) then + lua_util.debugm(N, task, 'Generic selector token %s is excluded from reputation scoring', token) + return + end local score = generic_reputation_calc(values, rule, 1.0, task) if math.abs(score) > 1e-3 then @@ -742,14 +791,22 @@ local function generic_reputation_idempotent(task, rule) if sc then if type(selector_res) == 'table' then fun.each(function(e) - lua_util.debugm(N, task, 'set generic selector (%s) %s = %s', - rule['symbol'], e, sc) - rule.backend.set_token(task, rule, nil, e, sc) + if cfg.exclusion_map and cfg.exclusion_map:get_key(e) then + lua_util.debugm(N, task, 'Generic selector token %s is excluded from reputation update', e) + else + lua_util.debugm(N, task, 'set generic selector (%s) %s = %s', + rule['symbol'], e, sc) + rule.backend.set_token(task, rule, nil, e, sc) + end end, selector_res) else - lua_util.debugm(N, task, 'set generic selector (%s) %s = %s', - rule['symbol'], selector_res, sc) - rule.backend.set_token(task, rule, nil, selector_res, sc) + if cfg.exclusion_map and cfg.exclusion_map:get_key(selector_res) then + lua_util.debugm(N, task, 'Generic selector token %s is excluded from reputation update', selector_res) + else + lua_util.debugm(N, task, 'set generic selector (%s) %s = %s', + rule['symbol'], selector_res, sc) + rule.backend.set_token(task, rule, nil, selector_res, sc) + end end end end @@ -764,6 +821,7 @@ local generic_selector = { selector = ts.string, delimiter = ts.string, whitelist = ts.one_of(lua_maps.map_schema, lua_maps_exprs.schema):is_optional(), + exclusion_map = ts.one_of(lua_maps.map_schema, lua_maps_exprs.schema):is_optional() }, config = { lower_bound = 10, -- minimum number of messages to be scored @@ -773,7 +831,8 @@ local generic_selector = { inbound = true, selector = nil, delimiter = ':', - whitelist = nil + whitelist = nil, + exclusion_map = nil }, init = generic_reputation_init, filter = generic_reputation_filter, -- used to get scores @@ -1107,7 +1166,7 @@ local backends = { name = '1m', mult = 1.0, } - }, -- What buckets should be used, default 1h and 1month + }, -- What buckets should be used, default 1month }, init = reputation_redis_init, get_token = reputation_redis_get_token, @@ -1267,6 +1326,24 @@ local function parse_rule(name, tbl) end end + -- Parse exclusion_map for reputation exclusion lists + if rule.config.exclusion_map then + local map_type = 'set' -- Default to set for string-based selectors (dkim, url, spf, generic) + if sel_type == 'ip' or sel_type == 'sender' then + map_type = 'radix' -- Use radix for IP-based selectors + end + local map = lua_maps.map_add_from_ucl(rule.config.exclusion_map, + map_type, + sel_type .. ' reputation exclusion map') + if not map then + rspamd_logger.errx(rspamd_config, "cannot parse exclusion map config for %s: (%s)", + sel_type, + rule.config.exclusion_map) + return false + end + rule.config.exclusion_map = map + end + local symbol = rule.selector.config.symbol or name if tbl.symbol then symbol = tbl.symbol @@ -1387,4 +1464,4 @@ if opts['rules'] then end else lua_util.disable_module(N, "config") -end +end
\ No newline at end of file diff --git a/src/plugins/lua/settings.lua b/src/plugins/lua/settings.lua index 0f8e00723..c576e1325 100644 --- a/src/plugins/lua/settings.lua +++ b/src/plugins/lua/settings.lua @@ -1275,7 +1275,7 @@ local function gen_redis_callback(handler, id) ucl_err) else local obj = parser:get_object() - rspamd_logger.infox(task, "<%1> apply settings according to redis rule %2", + rspamd_logger.infox(task, "<%s> apply settings according to redis rule %s", task:get_message_id(), id) apply_settings(task, obj, nil, 'redis') break @@ -1283,7 +1283,7 @@ local function gen_redis_callback(handler, id) end end elseif err then - rspamd_logger.errx(task, 'Redis error: %1', err) + rspamd_logger.errx(task, 'Redis error: %s', err) end end @@ -1371,7 +1371,7 @@ if set_section and set_section[1] and type(set_section[1]) == "string" then opaque_data = true } if not rspamd_config:add_map(map_attrs) then - rspamd_logger.errx(rspamd_config, 'cannot load settings from %1', set_section) + rspamd_logger.errx(rspamd_config, 'cannot load settings from %s', set_section) end elseif set_section and type(set_section) == "table" then settings_map_pool = rspamd_mempool.create() diff --git a/src/plugins/lua/spamassassin.lua b/src/plugins/lua/spamassassin.lua index 3ea794495..c03481de2 100644 --- a/src/plugins/lua/spamassassin.lua +++ b/src/plugins/lua/spamassassin.lua @@ -221,7 +221,7 @@ local function handle_header_def(hline, cur_rule) }) cur_rule['function'] = function(task) if not re then - rspamd_logger.errx(task, 're is missing for rule %1', h) + rspamd_logger.errx(task, 're is missing for rule %s', h) return 0 end @@ -272,7 +272,7 @@ local function handle_header_def(hline, cur_rule) elseif func == 'case' then cur_param['strong'] = true else - rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2', + rspamd_logger.warnx(rspamd_config, 'Function %s is not supported in %s', func, cur_rule['symbol']) end end, fun.tail(args)) @@ -314,7 +314,7 @@ end local function freemail_search(input) local res = 0 local function trie_callback(number, pos) - lua_util.debugm(N, rspamd_config, 'Matched pattern %1 at pos %2', freemail_domains[number], pos) + lua_util.debugm(N, rspamd_config, 'Matched pattern %s at pos %s', freemail_domains[number], pos) res = res + 1 end @@ -369,7 +369,7 @@ local function gen_eval_rule(arg) end return 0 else - rspamd_logger.infox(rspamd_config, 'cannot create regexp %1', re) + rspamd_logger.infox(rspamd_config, 'cannot create regexp %s', re) return 0 end end @@ -461,7 +461,7 @@ local function gen_eval_rule(arg) end end else - rspamd_logger.infox(task, 'unimplemented mime check %1', arg) + rspamd_logger.infox(task, 'unimplemented mime check %s', arg) end end @@ -576,7 +576,7 @@ local function maybe_parse_sa_function(line) local elts = split(line, '[^:]+') arg = elts[2] - lua_util.debugm(N, rspamd_config, 'trying to parse SA function %1 with args %2', + lua_util.debugm(N, rspamd_config, 'trying to parse SA function %s with args %s', elts[1], elts[2]) local substitutions = { { '^exists:', @@ -612,7 +612,7 @@ local function maybe_parse_sa_function(line) end if not func then - rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %1', + rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %s', arg) else return func(task) @@ -685,7 +685,7 @@ local function process_sa_conf(f) end -- We have previous rule valid if not cur_rule['symbol'] then - rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule) + rspamd_logger.errx(rspamd_config, 'bad rule definition: %s', cur_rule) end rules[cur_rule['symbol']] = cur_rule cur_rule = {} @@ -695,15 +695,15 @@ local function process_sa_conf(f) local function parse_score(words) if #words == 3 then -- score rule <x> - lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[3]) + lua_util.debugm(N, rspamd_config, 'found score for %s: %s', words[2], words[3]) return tonumber(words[3]) elseif #words == 6 then -- score rule <x1> <x2> <x3> <x4> -- we assume here that bayes and network are enabled and select <x4> - lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[6]) + lua_util.debugm(N, rspamd_config, 'found score for %s: %s', words[2], words[6]) return tonumber(words[6]) else - rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2]) + rspamd_logger.errx(rspamd_config, 'invalid score for %s', words[2]) end return 0 @@ -812,7 +812,7 @@ local function process_sa_conf(f) cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr']) if not cur_rule['re'] then - rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2", + rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%s' for %s", cur_rule['re_expr'], cur_rule['symbol']) else cur_rule['re']:set_max_hits(1) @@ -829,8 +829,8 @@ local function process_sa_conf(f) cur_rule['mime'] = false end - if cur_rule['re'] and cur_rule['symbol'] and - (cur_rule['header'] or cur_rule['function']) then + if cur_rule['re'] and cur_rule['symbol'] + and (cur_rule['header'] or cur_rule['function']) then valid_rule = true cur_rule['re']:set_max_hits(1) if cur_rule['header'] and cur_rule['ordinary'] then @@ -894,7 +894,7 @@ local function process_sa_conf(f) cur_rule['function'] = func valid_rule = true else - rspamd_logger.infox(rspamd_config, 'unknown function %1', args) + rspamd_logger.infox(rspamd_config, 'unknown function %s', args) end end elseif words[1] == "body" then @@ -931,7 +931,7 @@ local function process_sa_conf(f) cur_rule['function'] = func valid_rule = true else - rspamd_logger.infox(rspamd_config, 'unknown function %1', args) + rspamd_logger.infox(rspamd_config, 'unknown function %s', args) end end elseif words[1] == "rawbody" then @@ -968,7 +968,7 @@ local function process_sa_conf(f) cur_rule['function'] = func valid_rule = true else - rspamd_logger.infox(rspamd_config, 'unknown function %1', args) + rspamd_logger.infox(rspamd_config, 'unknown function %s', args) end end elseif words[1] == "full" then @@ -1006,7 +1006,7 @@ local function process_sa_conf(f) cur_rule['function'] = func valid_rule = true else - rspamd_logger.infox(rspamd_config, 'unknown function %1', args) + rspamd_logger.infox(rspamd_config, 'unknown function %s', args) end end elseif words[1] == "uri" then @@ -1265,11 +1265,11 @@ local function post_process() if res then local nre = rspamd_regexp.create(nexpr) if not nre then - rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r) + rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %s', r) --rule['re'] = nil else local old_max_hits = rule['re']:get_max_hits() - lua_util.debugm(N, rspamd_config, 'replace %1 -> %2', r, nexpr) + lua_util.debugm(N, rspamd_config, 'replace %s -> %s', r, nexpr) rspamd_config:replace_regexp({ old_re = rule['re'], new_re = nre, @@ -1306,8 +1306,7 @@ local function post_process() end if not r['re'] then - rspamd_logger.errx(task, 're is missing for rule %1 (%2 header)', k, - h['header']) + rspamd_logger.errx(task, 're is missing for rule %s', h) return 0 end @@ -1434,7 +1433,7 @@ local function post_process() fun.each(function(k, r) local f = function(task) if not r['re'] then - rspamd_logger.errx(task, 're is missing for rule %1', k) + rspamd_logger.errx(task, 're is missing for rule %s', k) return 0 end @@ -1461,7 +1460,7 @@ local function post_process() fun.each(function(k, r) local f = function(task) if not r['re'] then - rspamd_logger.errx(task, 're is missing for rule %1', k) + rspamd_logger.errx(task, 're is missing for rule %s', k) return 0 end @@ -1486,7 +1485,7 @@ local function post_process() fun.each(function(k, r) local f = function(task) if not r['re'] then - rspamd_logger.errx(task, 're is missing for rule %1', k) + rspamd_logger.errx(task, 're is missing for rule %s', k) return 0 end @@ -1629,8 +1628,8 @@ local function post_process() rspamd_config:register_dependency(k, rspamd_symbol) external_deps[k][rspamd_symbol] = true lua_util.debugm(N, rspamd_config, - 'atom %1 is a direct foreign dependency, ' .. - 'register dependency for %2 on %3', + 'atom %s is a direct foreign dependency, ' .. + 'register dependency for %s on %s', a, k, rspamd_symbol) end end @@ -1659,8 +1658,8 @@ local function post_process() rspamd_config:register_dependency(k, dep) external_deps[k][dep] = true lua_util.debugm(N, rspamd_config, - 'atom %1 is an indirect foreign dependency, ' .. - 'register dependency for %2 on %3', + 'atom %s is an indirect foreign dependency, ' .. + 'register dependency for %s on %s', a, k, dep) nchanges = nchanges + 1 end @@ -1694,10 +1693,10 @@ local function post_process() -- Logging output if freemail_domains then freemail_trie = rspamd_trie.create(freemail_domains) - rspamd_logger.infox(rspamd_config, 'loaded %1 freemail domains definitions', + rspamd_logger.infox(rspamd_config, 'loaded %s freemail domains definitions', #freemail_domains) end - rspamd_logger.infox(rspamd_config, 'loaded %1 blacklist/whitelist elements', + rspamd_logger.infox(rspamd_config, 'loaded %s blacklist/whitelist elements', sa_lists['elts']) end @@ -1739,7 +1738,7 @@ if type(section) == "table" then process_sa_conf(f) has_rules = true else - rspamd_logger.errx(rspamd_config, "cannot open %1", matched) + rspamd_logger.errx(rspamd_config, "cannot open %s", matched) end end end @@ -1758,7 +1757,7 @@ if type(section) == "table" then process_sa_conf(f) has_rules = true else - rspamd_logger.errx(rspamd_config, "cannot open %1", matched) + rspamd_logger.errx(rspamd_config, "cannot open %s", matched) end end end diff --git a/src/plugins/lua/trie.lua b/src/plugins/lua/trie.lua index 7ba455289..7c7214b55 100644 --- a/src/plugins/lua/trie.lua +++ b/src/plugins/lua/trie.lua @@ -107,10 +107,10 @@ local function process_trie_file(symbol, cf) local file = io.open(cf['file']) if not file then - rspamd_logger.errx(rspamd_config, 'Cannot open trie file %1', cf['file']) + rspamd_logger.errx(rspamd_config, 'Cannot open trie file %s', cf['file']) else if cf['binary'] then - rspamd_logger.errx(rspamd_config, 'binary trie patterns are not implemented yet: %1', + rspamd_logger.errx(rspamd_config, 'binary trie patterns are not implemented yet: %s', cf['file']) else for line in file:lines() do @@ -123,7 +123,7 @@ end local function process_trie_conf(symbol, cf) if type(cf) ~= 'table' then - rspamd_logger.errx(rspamd_config, 'invalid value for symbol %1: "%2", expected table', + rspamd_logger.errx(rspamd_config, 'invalid value for symbol %s: "%s", expected table', symbol, cf) return end @@ -145,17 +145,17 @@ if opts then if #raw_patterns > 0 then raw_trie = rspamd_trie.create(raw_patterns) - rspamd_logger.infox(rspamd_config, 'registered raw search trie from %1 patterns', #raw_patterns) + rspamd_logger.infox(rspamd_config, 'registered raw search trie from %s patterns', #raw_patterns) end if #mime_patterns > 0 then mime_trie = rspamd_trie.create(mime_patterns) - rspamd_logger.infox(rspamd_config, 'registered mime search trie from %1 patterns', #mime_patterns) + rspamd_logger.infox(rspamd_config, 'registered mime search trie from %s patterns', #mime_patterns) end if #body_patterns > 0 then body_trie = rspamd_trie.create(body_patterns) - rspamd_logger.infox(rspamd_config, 'registered body search trie from %1 patterns', #body_patterns) + rspamd_logger.infox(rspamd_config, 'registered body search trie from %s patterns', #body_patterns) end local id = -1 diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 414e1b3a6..df704b8ed 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -15,6 +15,10 @@ */ /***MODULE:regexp * rspamd module that implements different regexp rules + * + * For object-based configuration, you can specify: + * - `expression_flags`: array of strings or single string with expression flags + * - `"noopt"`: disable expression optimizations (useful for some SpamAssassin rules) */ @@ -32,6 +36,7 @@ struct regexp_module_item { struct rspamd_expression *expr; const char *symbol; struct ucl_lua_funcdata *lua_function; + int expression_flags; }; struct regexp_ctx { @@ -68,12 +73,53 @@ regexp_get_context(struct rspamd_config *cfg) } /* Process regexp expression */ +static int +parse_expression_flags(const ucl_object_t *flags_obj) +{ + int flags = 0; + const ucl_object_t *cur; + ucl_object_iter_t it = NULL; + const char *flag_name; + + if (!flags_obj) { + return 0; + } + + if (ucl_object_type(flags_obj) == UCL_ARRAY) { + /* Array of flag names */ + while ((cur = ucl_object_iterate(flags_obj, &it, true)) != NULL) { + if (ucl_object_type(cur) == UCL_STRING) { + flag_name = ucl_object_tostring(cur); + if (strcmp(flag_name, "noopt") == 0) { + flags |= RSPAMD_EXPRESSION_FLAG_NOOPT; + } + else { + msg_warn("unknown expression flag: %s", flag_name); + } + } + } + } + else if (ucl_object_type(flags_obj) == UCL_STRING) { + /* Single flag name */ + flag_name = ucl_object_tostring(flags_obj); + if (strcmp(flag_name, "noopt") == 0) { + flags |= RSPAMD_EXPRESSION_FLAG_NOOPT; + } + else { + msg_warn("unknown expression flag: %s", flag_name); + } + } + + return flags; +} + static gboolean read_regexp_expression(rspamd_mempool_t *pool, struct regexp_module_item *chain, const char *symbol, const char *line, - struct rspamd_mime_expr_ud *ud) + struct rspamd_mime_expr_ud *ud, + int expression_flags) { struct rspamd_expression *e = NULL; GError *err = NULL; @@ -90,6 +136,7 @@ read_regexp_expression(rspamd_mempool_t *pool, g_assert(e != NULL); chain->expr = e; + chain->expression_flags = expression_flags; return TRUE; } @@ -165,13 +212,14 @@ int regexp_module_config(struct rspamd_config *cfg, bool validate) sizeof(struct regexp_module_item)); cur_item->symbol = ucl_object_key(value); cur_item->magic = rspamd_regexp_cb_magic; + cur_item->expression_flags = 0; ud.conf_obj = NULL; ud.cfg = cfg; if (!read_regexp_expression(cfg->cfg_pool, cur_item, ucl_object_key(value), - ucl_obj_tostring(value), &ud)) { + ucl_obj_tostring(value), &ud, 0)) { if (validate) { return FALSE; } @@ -193,6 +241,7 @@ int regexp_module_config(struct rspamd_config *cfg, bool validate) cur_item->magic = rspamd_regexp_cb_magic; cur_item->symbol = ucl_object_key(value); cur_item->lua_function = ucl_object_toclosure(value); + cur_item->expression_flags = 0; rspamd_symcache_add_symbol(cfg->cache, cur_item->symbol, @@ -222,12 +271,17 @@ int regexp_module_config(struct rspamd_config *cfg, bool validate) sizeof(struct regexp_module_item)); cur_item->symbol = ucl_object_key(value); cur_item->magic = rspamd_regexp_cb_magic; + cur_item->expression_flags = 0; /* Will be overwritten with parsed flags */ ud.cfg = cfg; ud.conf_obj = value; + /* Look for expression_flags */ + const ucl_object_t *flags_obj = ucl_object_lookup(value, "expression_flags"); + int expr_flags = parse_expression_flags(flags_obj); + if (!read_regexp_expression(cfg->cfg_pool, cur_item, ucl_object_key(value), - ucl_obj_tostring(elt), &ud)) { + ucl_obj_tostring(elt), &ud, expr_flags)) { if (validate) { return FALSE; } @@ -253,6 +307,7 @@ int regexp_module_config(struct rspamd_config *cfg, bool validate) cur_item->magic = rspamd_regexp_cb_magic; cur_item->symbol = ucl_object_key(value); cur_item->lua_function = ucl_object_toclosure(value); + cur_item->expression_flags = 0; } if (cur_item && (is_lua || valid_expression)) { @@ -548,7 +603,7 @@ process_regexp_item(struct rspamd_task *task, else { /* Process expression */ if (item->expr) { - res = rspamd_process_expression(item->expr, 0, task); + res = rspamd_process_expression(item->expr, item->expression_flags, task); } else { msg_warn_task("FIXME: %s symbol is broken with new expressions", |