aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/lua
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/lua')
-rw-r--r--src/plugins/lua/arc.lua204
-rw-r--r--src/plugins/lua/bayes_expiry.lua182
-rw-r--r--src/plugins/lua/contextal.lua338
-rw-r--r--src/plugins/lua/elastic.lua172
-rw-r--r--src/plugins/lua/fuzzy_collect.lua2
-rw-r--r--src/plugins/lua/gpt.lua666
-rw-r--r--src/plugins/lua/greylist.lua23
-rw-r--r--src/plugins/lua/hfilter.lua14
-rw-r--r--src/plugins/lua/history_redis.lua6
-rw-r--r--src/plugins/lua/known_senders.lua62
-rw-r--r--src/plugins/lua/milter_headers.lua2
-rw-r--r--src/plugins/lua/mime_types.lua6
-rw-r--r--src/plugins/lua/multimap.lua827
-rw-r--r--src/plugins/lua/phishing.lua2
-rw-r--r--src/plugins/lua/ratelimit.lua6
-rw-r--r--src/plugins/lua/rbl.lua41
-rw-r--r--src/plugins/lua/replies.lua26
-rw-r--r--src/plugins/lua/reputation.lua113
-rw-r--r--src/plugins/lua/settings.lua156
-rw-r--r--src/plugins/lua/spamassassin.lua65
-rw-r--r--src/plugins/lua/trie.lua12
21 files changed, 2337 insertions, 588 deletions
diff --git a/src/plugins/lua/arc.lua b/src/plugins/lua/arc.lua
index fb5dd93e6..954583ed0 100644
--- a/src/plugins/lua/arc.lua
+++ b/src/plugins/lua/arc.lua
@@ -72,12 +72,13 @@ local settings = {
use_domain = 'header',
use_esld = true,
use_redis = false,
- key_prefix = 'arc_keys', -- default hash name
- reuse_auth_results = false, -- Reuse the existing authentication results
+ key_prefix = 'arc_keys', -- default hash name
+ reuse_auth_results = false, -- Reuse the existing authentication results
whitelisted_signers_map = nil, -- Trusted signers domains
- adjust_dmarc = true, -- Adjust DMARC rejected policy for trusted forwarders
- allowed_ids = nil, -- Allowed settings id
- forbidden_ids = nil, -- Banned settings id
+ whitelist = nil, -- Domains with broken ARC implementations to trust despite validation failures
+ adjust_dmarc = true, -- Adjust DMARC rejected policy for trusted forwarders
+ allowed_ids = nil, -- Allowed settings id
+ forbidden_ids = nil, -- Banned settings id
}
-- To match normal AR
@@ -86,15 +87,15 @@ local ar_settings = lua_auth_results.default_settings
local function parse_arc_header(hdr, target, is_aar)
-- Split elements by ';' and trim spaces
local arr = fun.totable(fun.map(
- function(val)
- return fun.totable(fun.map(lua_util.rspamd_str_trim,
- fun.filter(function(v)
- return v and #v > 0
- end,
- lua_util.rspamd_str_split(val.decoded, ';')
- )
- ))
- end, hdr
+ function(val)
+ return fun.totable(fun.map(lua_util.rspamd_str_trim,
+ fun.filter(function(v)
+ return v and #v > 0
+ end,
+ lua_util.rspamd_str_split(val.decoded, ';')
+ )
+ ))
+ end, hdr
))
-- v[1] is the key and v[2] is the value
@@ -115,11 +116,11 @@ local function parse_arc_header(hdr, target, is_aar)
if not is_aar then
-- For normal ARC headers we split by kv pair, like k=v
fun.each(function(v)
- fill_arc_header_table(v, target[i])
- end,
- fun.map(function(elt)
- return lua_util.rspamd_str_split(elt, '=')
- end, elts)
+ fill_arc_header_table(v, target[i])
+ end,
+ fun.map(function(elt)
+ return lua_util.rspamd_str_split(elt, '=')
+ end, elts)
)
else
-- For AAR we check special case of i=%d and pass everything else to
@@ -147,7 +148,7 @@ local function parse_arc_header(hdr, target, is_aar)
-- sort by i= attribute
table.sort(target, function(a, b)
- return (a.i or 0) < (b.i or 0)
+ return (tonumber(a.i) or 0) < (tonumber(b.i) or 0)
end)
end
@@ -156,14 +157,14 @@ local function arc_validate_seals(task, seals, sigs, seal_headers, sig_headers)
for i = 1, #seals do
if (sigs[i].i or 0) ~= i then
fail_reason = string.format('bad i for signature: %d, expected %d; d=%s',
- sigs[i].i, i, sigs[i].d)
+ sigs[i].i, i, sigs[i].d)
rspamd_logger.infox(task, fail_reason)
task:insert_result(arc_symbols['invalid'], 1.0, fail_reason)
return false, fail_reason
end
if (seals[i].i or 0) ~= i then
fail_reason = string.format('bad i for seal: %d, expected %d; d=%s',
- seals[i].i, i, seals[i].d)
+ seals[i].i, i, seals[i].d)
rspamd_logger.infox(task, fail_reason)
task:insert_result(arc_symbols['invalid'], 1.0, fail_reason)
return false, fail_reason
@@ -207,7 +208,7 @@ local function arc_callback(task)
if #arc_sig_headers ~= #arc_seal_headers then
-- We mandate that count of seals is equal to count of signatures
rspamd_logger.infox(task, 'number of seals (%s) is not equal to number of signatures (%s)',
- #arc_seal_headers, #arc_sig_headers)
+ #arc_seal_headers, #arc_sig_headers)
task:insert_result(arc_symbols['invalid'], 1.0, 'invalid count of seals and signatures')
return
end
@@ -249,7 +250,7 @@ local function arc_callback(task)
-- Now check sanity of what we have
local valid, validation_error = arc_validate_seals(task, cbdata.seals, cbdata.sigs,
- arc_seal_headers, arc_sig_headers)
+ arc_seal_headers, arc_sig_headers)
if not valid then
task:cache_set('arc-failure', validation_error)
return
@@ -267,12 +268,20 @@ local function arc_callback(task)
local function gen_arc_seal_cb(index, sig)
return function(_, res, err, domain)
lua_util.debugm(N, task, 'checked arc seal: %s(%s), %s processed',
- res, err, index)
+ res, err, index)
if not res then
- cbdata.res = 'fail'
- if err and domain then
- table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err))
+ -- Check if this domain is whitelisted for broken ARC implementations
+ if settings.whitelist and domain and settings.whitelist:get_key(domain) then
+ rspamd_logger.infox(task, 'ARC seal validation failed for whitelisted domain %s, treating as valid: %s',
+ domain, err)
+ lua_util.debugm(N, task, 'whitelisted domain %s ARC seal failure ignored', domain)
+ res = true -- Treat as valid to continue the chain
+ else
+ cbdata.res = 'fail'
+ if err and domain then
+ table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err))
+ end
end
end
@@ -283,7 +292,7 @@ local function arc_callback(task)
local cur_aar = cbdata.ars[index]
if not cur_aar then
rspamd_logger.warnx(task, "cannot find Arc-Authentication-Results for trusted " ..
- "forwarder %s on i=%s", domain, cbdata.index)
+ "forwarder %s on i=%s", domain, cbdata.index)
else
task:cache_set(AR_TRUSTED_CACHE_KEY, cur_aar)
local seen_dmarc
@@ -309,20 +318,20 @@ local function arc_callback(task)
end
end
task:insert_result(arc_symbols.trusted_allow, mult,
- string.format('%s:s=%s:i=%d', domain, sig.s, index))
+ string.format('%s:s=%s:i=%d', domain, sig.s, index))
end
end
if index == #arc_sig_headers then
if cbdata.res == 'success' then
local arc_allow_result = string.format('%s:s=%s:i=%d',
- domain, sig.s, index)
+ domain, sig.s, index)
task:insert_result(arc_symbols.allow, 1.0, arc_allow_result)
task:cache_set('arc-allow', arc_allow_result)
else
task:insert_result(arc_symbols.reject, 1.0,
- rspamd_logger.slog('seal check failed: %s, %s', cbdata.res,
- cbdata.errors))
+ rspamd_logger.slog('seal check failed: %s, %s', cbdata.res,
+ cbdata.errors))
end
end
end
@@ -330,12 +339,20 @@ local function arc_callback(task)
local function arc_signature_cb(_, res, err, domain)
lua_util.debugm(N, task, 'checked arc signature %s: %s(%s)',
- domain, res, err)
+ domain, res, err)
if not res then
- cbdata.res = 'fail'
- if err and domain then
- table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err))
+ -- Check if this domain is whitelisted for broken ARC implementations
+ if settings.whitelist and domain and settings.whitelist:get_key(domain) then
+ rspamd_logger.infox(task, 'ARC signature validation failed for whitelisted domain %s, treating as valid: %s',
+ domain, err)
+ lua_util.debugm(N, task, 'whitelisted domain %s ARC signature failure ignored', domain)
+ res = true -- Treat as valid to continue the chain
+ else
+ cbdata.res = 'fail'
+ if err and domain then
+ table.insert(cbdata.errors, string.format('sig:%s:%s', domain, err))
+ end
end
end
if cbdata.res == 'success' then
@@ -343,17 +360,24 @@ local function arc_callback(task)
for i, sig in ipairs(cbdata.seals) do
local ret, lerr = dkim_verify(task, sig.header, gen_arc_seal_cb(i, sig), 'arc-seal')
if not ret then
- cbdata.res = 'fail'
- table.insert(cbdata.errors, string.format('seal:%s:s=%s:i=%s:%s',
+ -- Check if this domain is whitelisted for broken ARC implementations
+ if settings.whitelist and sig.d and settings.whitelist:get_key(sig.d) then
+ rspamd_logger.infox(task, 'ARC seal dkim_verify failed for whitelisted domain %s, treating as valid: %s',
+ sig.d, lerr)
+ lua_util.debugm(N, task, 'whitelisted domain %s ARC seal dkim_verify failure ignored', sig.d)
+ else
+ cbdata.res = 'fail'
+ table.insert(cbdata.errors, string.format('seal:%s:s=%s:i=%s:%s',
sig.d or '', sig.s or '', sig.i or '', lerr))
- lua_util.debugm(N, task, 'checked arc seal %s: %s(%s), %s processed',
+ lua_util.debugm(N, task, 'checked arc seal %s: %s(%s), %s processed',
sig.d, ret, lerr, i)
+ end
end
end
else
task:insert_result(arc_symbols['reject'], 1.0,
- rspamd_logger.slog('signature check failed: %s, %s', cbdata.res,
- cbdata.errors))
+ rspamd_logger.slog('signature check failed: %s, %s', cbdata.res,
+ cbdata.errors))
end
end
@@ -397,25 +421,33 @@ local function arc_callback(task)
is "fail" and the algorithm stops here.
9. If the algorithm reaches this step, then the Chain Validation
Status is "pass", and the algorithm is complete.
- ]]--
+ ]] --
local processed = 0
local sig = cbdata.sigs[#cbdata.sigs] -- last AMS
local ret, err = dkim_verify(task, sig.header, arc_signature_cb, 'arc-sign')
if not ret then
- cbdata.res = 'fail'
- table.insert(cbdata.errors, string.format('sig:%s:%s', sig.d or '', err))
+ -- Check if this domain is whitelisted for broken ARC implementations
+ if settings.whitelist and sig.d and settings.whitelist:get_key(sig.d) then
+ rspamd_logger.infox(task, 'ARC signature dkim_verify failed for whitelisted domain %s, treating as valid: %s',
+ sig.d, err)
+ lua_util.debugm(N, task, 'whitelisted domain %s ARC signature dkim_verify failure ignored', sig.d)
+ processed = processed + 1
+ else
+ cbdata.res = 'fail'
+ table.insert(cbdata.errors, string.format('sig:%s:%s', sig.d or '', err))
+ end
else
processed = processed + 1
lua_util.debugm(N, task, 'processed arc signature %s[%s]: %s(%s), %s total',
- sig.d, sig.i, ret, err, #cbdata.seals)
+ sig.d, sig.i, ret, err, #cbdata.seals)
end
if processed == 0 then
task:insert_result(arc_symbols['reject'], 1.0,
- rspamd_logger.slog('cannot verify %s of %s signatures: %s',
- #arc_sig_headers - processed, #arc_sig_headers, cbdata.errors))
+ rspamd_logger.slog('cannot verify %s of %s signatures: %s',
+ #arc_sig_headers - processed, #arc_sig_headers, cbdata.errors))
end
end
@@ -538,13 +570,13 @@ local function arc_sign_seal(task, params, header)
for i = 1, #arc_seals, 1 do
if arc_auth_results[i] then
local s = dkim_canonicalize('ARC-Authentication-Results',
- arc_auth_results[i].raw_header)
+ arc_auth_results[i].raw_header)
sha_ctx:update(s)
lua_util.debugm(N, task, 'update signature with header: %s', s)
end
if arc_sigs[i] then
local s = dkim_canonicalize('ARC-Message-Signature',
- arc_sigs[i].raw_header)
+ arc_sigs[i].raw_header)
sha_ctx:update(s)
lua_util.debugm(N, task, 'update signature with header: %s', s)
end
@@ -557,16 +589,16 @@ local function arc_sign_seal(task, params, header)
end
header = lua_util.fold_header(task,
- 'ARC-Message-Signature',
- header)
+ 'ARC-Message-Signature',
+ header)
cur_auth_results = string.format('i=%d; %s', cur_idx, cur_auth_results)
cur_auth_results = lua_util.fold_header(task,
- 'ARC-Authentication-Results',
- cur_auth_results, ';')
+ 'ARC-Authentication-Results',
+ cur_auth_results, ';')
local s = dkim_canonicalize('ARC-Authentication-Results',
- cur_auth_results)
+ cur_auth_results)
sha_ctx:update(s)
lua_util.debugm(N, task, 'update signature with header: %s', s)
s = dkim_canonicalize('ARC-Message-Signature', header)
@@ -574,10 +606,10 @@ local function arc_sign_seal(task, params, header)
lua_util.debugm(N, task, 'update signature with header: %s', s)
local cur_arc_seal = string.format('i=%d; s=%s; d=%s; t=%d; a=rsa-sha256; cv=%s; b=',
- cur_idx,
- params.selector,
- params.domain,
- math.floor(rspamd_util.get_time()), params.arc_cv)
+ cur_idx,
+ params.selector,
+ params.domain,
+ math.floor(rspamd_util.get_time()), params.arc_cv)
s = string.format('%s:%s', 'arc-seal', cur_arc_seal)
sha_ctx:update(s)
lua_util.debugm(N, task, 'initial update signature with header: %s', s)
@@ -591,20 +623,23 @@ local function arc_sign_seal(task, params, header)
local sig = rspamd_rsa.sign_memory(privkey, sha_ctx:bin())
cur_arc_seal = string.format('%s%s', cur_arc_seal,
- sig:base64(70, nl_type))
+ sig:base64(70, nl_type))
lua_mime.modify_headers(task, {
add = {
['ARC-Authentication-Results'] = { order = 1, value = cur_auth_results },
['ARC-Message-Signature'] = { order = 1, value = header },
- ['ARC-Seal'] = { order = 1, value = lua_util.fold_header(task,
- 'ARC-Seal', cur_arc_seal) }
+ ['ARC-Seal'] = {
+ order = 1,
+ value = lua_util.fold_header(task,
+ 'ARC-Seal', cur_arc_seal)
+ }
},
-- RFC requires a strict order for these headers to be inserted
order = { 'ARC-Authentication-Results', 'ARC-Message-Signature', 'ARC-Seal' },
})
task:insert_result(settings.sign_symbol, 1.0,
- string.format('%s:s=%s:i=%d', params.domain, params.selector, cur_idx))
+ string.format('%s:s=%s:i=%d', params.domain, params.selector, cur_idx))
end
local function prepare_arc_selector(task, sel)
@@ -668,7 +703,6 @@ local function prepare_arc_selector(task, sel)
else
default_arc_cv()
end
-
end
return true
@@ -695,19 +729,18 @@ local function do_sign(task, sign_params)
sign_params.pubkey = results[1]
sign_params.strict_pubkey_check = not settings.allow_pubkey_mismatch
elseif not settings.allow_pubkey_mismatch then
- rspamd_logger.errx('public key for domain %s/%s is not found: %s, skip signing',
- sign_params.domain, sign_params.selector, err)
+ rspamd_logger.errx(task, 'public key for domain %s/%s is not found: %s, skip signing',
+ sign_params.domain, sign_params.selector, err)
return
else
- rspamd_logger.infox('public key for domain %s/%s is not found: %s',
- sign_params.domain, sign_params.selector, err)
+ rspamd_logger.infox(task, 'public key for domain %s/%s is not found: %s',
+ sign_params.domain, sign_params.selector, err)
end
local dret, hdr = dkim_sign(task, sign_params)
if dret then
arc_sign_seal(task, sign_params, hdr)
end
-
end,
forced = true
})
@@ -768,6 +801,31 @@ end
dkim_sign_tools.process_signing_settings(N, settings, opts)
+-- Process ARC-specific maps that aren't handled by dkim_sign_tools
+local lua_maps = require "lua_maps"
+
+if opts.whitelisted_signers_map then
+ settings.whitelisted_signers_map = lua_maps.map_add_from_ucl(opts.whitelisted_signers_map, 'set',
+ 'ARC trusted signers domains')
+ if not settings.whitelisted_signers_map then
+ rspamd_logger.errx(rspamd_config, 'cannot load whitelisted_signers_map')
+ settings.whitelisted_signers_map = nil
+ else
+ rspamd_logger.infox(rspamd_config, 'loaded ARC whitelisted signers map')
+ end
+end
+
+if opts.whitelist then
+ settings.whitelist = lua_maps.map_add_from_ucl(opts.whitelist, 'set',
+ 'ARC domains with broken implementations')
+ if not settings.whitelist then
+ rspamd_logger.errx(rspamd_config, 'cannot load ARC whitelist map')
+ settings.whitelist = nil
+ else
+ rspamd_logger.infox(rspamd_config, 'loaded ARC whitelist map')
+ end
+end
+
if not dkim_sign_tools.validate_signing_settings(settings) then
rspamd_logger.infox(rspamd_config, 'mandatory parameters missing, disable arc signing')
return
@@ -780,7 +838,7 @@ if ar_opts and ar_opts.routines then
if routines['authentication-results'] then
ar_settings = lua_util.override_defaults(ar_settings,
- routines['authentication-results'])
+ routines['authentication-results'])
end
end
@@ -789,7 +847,7 @@ if settings.use_redis then
if not redis_params then
rspamd_logger.errx(rspamd_config, 'no servers are specified, ' ..
- 'but module is configured to load keys from redis, disable arc signing')
+ 'but module is configured to load keys from redis, disable arc signing')
return
end
@@ -845,9 +903,9 @@ if settings.adjust_dmarc and settings.whitelisted_signers_map then
local dmarc_fwd = ar.dmarc
if dmarc_fwd == 'pass' then
rspamd_logger.infox(task, "adjust dmarc reject score as trusted forwarder "
- .. "proved DMARC validity for %s", ar['header.from'])
+ .. "proved DMARC validity for %s", ar['header.from'])
task:adjust_result(sym_to_adjust, 0.1,
- 'ARC trusted')
+ 'ARC trusted')
end
end
end
diff --git a/src/plugins/lua/bayes_expiry.lua b/src/plugins/lua/bayes_expiry.lua
index 44ff9dafa..0d78f2272 100644
--- a/src/plugins/lua/bayes_expiry.lua
+++ b/src/plugins/lua/bayes_expiry.lua
@@ -41,32 +41,38 @@ local template = {}
local function check_redis_classifier(cls, cfg)
-- Skip old classifiers
if cls.new_schema then
- local symbol_spam, symbol_ham
+ local class_symbols = {}
+ local class_labels = {}
local expiry = (cls.expiry or cls.expire)
if type(expiry) == 'table' then
expiry = expiry[1]
end
- -- Load symbols from statfiles
+ -- Extract class_labels mapping from classifier config
+ if cls.class_labels then
+ class_labels = cls.class_labels
+ end
+ -- Load symbols from statfiles for multi-class support
local function check_statfile_table(tbl, def_sym)
local symbol = tbl.symbol or def_sym
-
- local spam
- if tbl.spam then
- spam = tbl.spam
- else
- if string.match(symbol:upper(), 'SPAM') then
- spam = true
+ local class_name = tbl.class
+
+ -- Handle legacy spam/ham detection for backward compatibility
+ if not class_name then
+ if tbl.spam ~= nil then
+ class_name = tbl.spam and 'spam' or 'ham'
+ elseif string.match(tostring(symbol):upper(), 'SPAM') then
+ class_name = 'spam'
+ elseif string.match(tostring(symbol):upper(), 'HAM') then
+ class_name = 'ham'
else
- spam = false
+ class_name = def_sym
end
end
- if spam then
- symbol_spam = symbol
- else
- symbol_ham = symbol
+ if class_name then
+ class_symbols[class_name] = symbol
end
end
@@ -87,10 +93,9 @@ local function check_redis_classifier(cls, cfg)
end
end
- if not symbol_spam or not symbol_ham or type(expiry) ~= 'number' then
+ if next(class_symbols) == nil or type(expiry) ~= 'number' then
logger.debugm(N, rspamd_config,
- 'disable expiry for classifier %s: no expiry %s',
- symbol_spam, cls)
+ 'disable expiry for classifier: no class symbols or expiry configured')
return
end
-- Now try to load redis_params if needed
@@ -108,17 +113,16 @@ local function check_redis_classifier(cls, cfg)
end
if redis_params['read_only'] then
- logger.infox(rspamd_config, 'disable expiry for classifier %s: read only redis configuration',
- symbol_spam)
+ logger.infox(rspamd_config, 'disable expiry for classifier: read only redis configuration')
return
end
- logger.debugm(N, rspamd_config, "enabled expiry for %s/%s -> %s expiry",
- symbol_spam, symbol_ham, expiry)
+ logger.debugm(N, rspamd_config, "enabled expiry for classes %s -> %s expiry",
+ table.concat(lutil.keys(class_symbols), ', '), expiry)
table.insert(settings.classifiers, {
- symbol_spam = symbol_spam,
- symbol_ham = symbol_ham,
+ class_symbols = class_symbols,
+ class_labels = class_labels,
redis_params = redis_params,
expiry = expiry
})
@@ -249,12 +253,11 @@ local expiry_script = [[
local keys = ret[2]
local tokens = {}
- -- Tokens occurrences distribution counters
+ -- Dynamic occurrence tracking for all classes
local occur = {
- ham = {},
- spam = {},
total = {}
}
+ local classes_found = {}
-- Expiry step statistics counters
local nelts, extended, discriminated, sum, sum_squares, common, significant,
@@ -264,24 +267,44 @@ local expiry_script = [[
for _,key in ipairs(keys) do
local t = redis.call('TYPE', key)["ok"]
if t == 'hash' then
- local values = redis.call('HMGET', key, 'H', 'S')
- local ham = tonumber(values[1]) or 0
- local spam = tonumber(values[2]) or 0
+ -- Get all hash fields to support multi-class
+ local hash_data = redis.call('HGETALL', key)
+ local class_counts = {}
+ local total = 0
local ttl = redis.call('TTL', key)
+
+ -- Parse hash data into class counts
+ for i = 1, #hash_data, 2 do
+ local class_label = hash_data[i]
+ local count = tonumber(hash_data[i + 1]) or 0
+ class_counts[class_label] = count
+ total = total + count
+
+ -- Track classes we've seen
+ if not classes_found[class_label] then
+ classes_found[class_label] = true
+ occur[class_label] = {}
+ end
+ end
+
tokens[key] = {
- ham,
- spam,
- ttl
+ class_counts = class_counts,
+ total = total,
+ ttl = ttl
}
- local total = spam + ham
+
sum = sum + total
sum_squares = sum_squares + total * total
nelts = nelts + 1
- for k,v in pairs({['ham']=ham, ['spam']=spam, ['total']=total}) do
- if tonumber(v) > 19 then v = 20 end
- occur[k][v] = occur[k][v] and occur[k][v] + 1 or 1
+ -- Update occurrence counters for all classes and total
+ for class_label, count in pairs(class_counts) do
+ local bucket = count > 19 and 20 or count
+ occur[class_label][bucket] = (occur[class_label][bucket] or 0) + 1
end
+
+ local total_bucket = total > 19 and 20 or total
+ occur.total[total_bucket] = (occur.total[total_bucket] or 0) + 1
end
end
@@ -293,9 +316,10 @@ local expiry_script = [[
end
for key,token in pairs(tokens) do
- local ham, spam, ttl = token[1], token[2], tonumber(token[3])
+ local class_counts = token.class_counts
+ local total = token.total
+ local ttl = tonumber(token.ttl)
local threshold = mean
- local total = spam + ham
local function set_ttl()
if expire < 0 then
@@ -310,14 +334,39 @@ local expiry_script = [[
return 0
end
- if total == 0 or math.abs(ham - spam) <= total * ${epsilon_common} then
+ -- Check if token is common (balanced across classes)
+ local is_common = false
+ if total == 0 then
+ is_common = true
+ else
+ -- For multi-class, check if any class dominates significantly
+ local max_count = 0
+ for _, count in pairs(class_counts) do
+ if count > max_count then
+ max_count = count
+ end
+ end
+ -- Token is common if no class has more than (1 - epsilon) of total
+ is_common = (max_count / total) <= (1 - ${epsilon_common})
+ end
+
+ if is_common then
common = common + 1
if ttl > ${common_ttl} then
discriminated = discriminated + 1
redis.call('EXPIRE', key, ${common_ttl})
end
elseif total >= threshold and total > 0 then
- if ham / total > ${significant_factor} or spam / total > ${significant_factor} then
+ -- Check if any class is significant
+ local is_significant = false
+ for _, count in pairs(class_counts) do
+ if count / total > ${significant_factor} then
+ is_significant = true
+ break
+ end
+ end
+
+ if is_significant then
significant = significant + 1
if ttl ~= -1 then
redis.call('PERSIST', key)
@@ -361,33 +410,50 @@ local expiry_script = [[
redis.call('DEL', lock_key)
local occ_distr = {}
- for _,cl in pairs({'ham', 'spam', 'total'}) do
+
+ -- Process all classes found plus total
+ local all_classes = {'total'}
+ for class_label in pairs(classes_found) do
+ table.insert(all_classes, class_label)
+ end
+
+ for _, cl in ipairs(all_classes) do
local occur_key = pattern_sha1 .. '_occurrence_' .. cl
if cursor ~= 0 then
- local n
- for i,v in ipairs(redis.call('HGETALL', occur_key)) do
- if i % 2 == 1 then
- n = tonumber(v)
- else
- occur[cl][n] = occur[cl][n] and occur[cl][n] + v or v
+ local existing_data = redis.call('HGETALL', occur_key)
+ if #existing_data > 0 then
+ for i = 1, #existing_data, 2 do
+ local bucket = tonumber(existing_data[i])
+ local count = tonumber(existing_data[i + 1])
+ if occur[cl] and occur[cl][bucket] then
+ occur[cl][bucket] = occur[cl][bucket] + count
+ elseif occur[cl] then
+ occur[cl][bucket] = count
+ end
end
end
- local str = ''
- if occur[cl][0] ~= nil then
- str = '0:' .. occur[cl][0] .. ','
- end
- for k,v in ipairs(occur[cl]) do
- if k == 20 then k = '>19' end
- str = str .. k .. ':' .. v .. ','
+ if occur[cl] and next(occur[cl]) then
+ local str = ''
+ if occur[cl][0] then
+ str = '0:' .. occur[cl][0] .. ','
+ end
+ for k = 1, 20 do
+ if occur[cl][k] then
+ local label = k == 20 and '>19' or tostring(k)
+ str = str .. label .. ':' .. occur[cl][k] .. ','
+ end
+ end
+ table.insert(occ_distr, cl .. '=' .. str)
+ else
+ table.insert(occ_distr, cl .. '=no_data')
end
- table.insert(occ_distr, str)
else
redis.call('DEL', occur_key)
end
- if next(occur[cl]) ~= nil then
+ if occur[cl] and next(occur[cl]) then
redis.call('HMSET', occur_key, unpack_function(hash2list(occur[cl])))
end
end
@@ -446,8 +512,8 @@ local function expire_step(cls, ev_base, worker)
'%s infrequent (%s %s), %s mean, %s std',
lutil.unpack(d))
if cycle then
- for i, cl in ipairs({ 'in ham', 'in spam', 'total' }) do
- logger.infox(rspamd_config, 'tokens occurrences, %s: {%s}', cl, occ_distr[i])
+ for _, distr_info in ipairs(occ_distr) do
+ logger.infox(rspamd_config, 'tokens occurrences: {%s}', distr_info)
end
end
end
diff --git a/src/plugins/lua/contextal.lua b/src/plugins/lua/contextal.lua
new file mode 100644
index 000000000..e29c21645
--- /dev/null
+++ b/src/plugins/lua/contextal.lua
@@ -0,0 +1,338 @@
+--[[
+Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local E = {}
+local N = 'contextal'
+
+if confighelp then
+ return
+end
+
+local opts = rspamd_config:get_all_opt(N)
+if not opts then
+ return
+end
+
+local lua_redis = require "lua_redis"
+local lua_util = require "lua_util"
+local redis_cache = require "lua_cache"
+local rspamd_http = require "rspamd_http"
+local rspamd_logger = require "rspamd_logger"
+local rspamd_util = require "rspamd_util"
+local ts = require("tableshape").types
+local ucl = require "ucl"
+
+local cache_context, redis_params
+
+local contextal_actions = {
+ ['ALERT'] = true,
+ ['ALLOW'] = true,
+ ['BLOCK'] = true,
+ ['QUARANTINE'] = true,
+ ['SPAM'] = true,
+}
+
+local config_schema = lua_redis.enrich_schema {
+ action_symbol_prefix = ts.string:is_optional(),
+ base_url = ts.string:is_optional(),
+ cache_prefix = ts.string:is_optional(),
+ cache_timeout = ts.number:is_optional(),
+ cache_ttl = ts.number:is_optional(),
+ custom_actions = ts.array_of(ts.string):is_optional(),
+ defer_if_no_result = ts.boolean:is_optional(),
+ defer_message = ts.string:is_optional(),
+ enabled = ts.boolean:is_optional(),
+ http_timeout = ts.number:is_optional(),
+ request_ttl = ts.number:is_optional(),
+ submission_symbol = ts.string:is_optional(),
+}
+
+local settings = {
+ action_symbol_prefix = 'CONTEXTAL_ACTION',
+ base_url = 'http://localhost:8080',
+ cache_prefix = 'CXAL',
+ cache_timeout = 5,
+ cache_ttl = 3600,
+ custom_actions = {},
+ defer_if_no_result = false,
+ defer_message = 'Awaiting deep scan - try again later',
+ http_timeout = 2,
+ request_ttl = 4,
+ submission_symbol = 'CONTEXTAL_SUBMIT',
+}
+
+local static_boundary = rspamd_util.random_hex(32)
+local wait_request_ttl = true
+
+local function maybe_defer(task, obj)
+ if settings.defer_if_no_result and not ((obj or E)[1] or E).actions then
+ task:set_pre_result('soft reject', settings.defer_message, N)
+ end
+end
+
+local function process_actions(task, obj, is_cached)
+ lua_util.debugm(N, task, 'got result: %s (%s)', obj, is_cached and 'cached' or 'fresh')
+ for _, match in ipairs((obj[1] or E).actions or E) do
+ local act = match.action
+ local scenario = match.scenario
+ if not (act and scenario) then
+ rspamd_logger.err(task, 'bad result: %s', match)
+ elseif contextal_actions[act] then
+ task:insert_result(settings.action_symbol_prefix .. '_' .. act, 1.0, scenario)
+ else
+ rspamd_logger.err(task, 'unknown action: %s', act)
+ end
+ end
+
+ if not cache_context or is_cached then
+ maybe_defer(task, obj)
+ return
+ end
+
+ local cache_obj
+ if (obj[1] or E).actions then
+ cache_obj = {[1] = {["actions"] = obj[1].actions}}
+ else
+ local work_id = task:get_mempool():get_variable('contextal_work_id', 'string')
+ if work_id then
+ cache_obj = {[1] = {["work_id"] = work_id}}
+ else
+ rspamd_logger.err(task, 'no work id found in mempool')
+ return
+ end
+ end
+
+ redis_cache.cache_set(task,
+ task:get_digest(),
+ cache_obj,
+ cache_context)
+
+ maybe_defer(task, obj)
+end
+
+local function process_cached(task, obj)
+ if (obj[1] or E).actions then
+ lua_util.debugm(N, task, 'using cached actions: %s', obj[1].actions)
+ task:disable_symbol(settings.action_symbol_prefix)
+ return process_actions(task, obj, true)
+ elseif (obj[1] or E).work_id then
+ lua_util.debugm(N, task, 'using old work ID: %s', obj[1].work_id)
+ task:get_mempool():set_variable('contextal_work_id', obj[1].work_id)
+ else
+ rspamd_logger.err(task, 'bad result (cached): %s', obj)
+ end
+end
+
+local function action_cb(task)
+ local work_id = task:get_mempool():get_variable('contextal_work_id', 'string')
+ if not work_id then
+ rspamd_logger.err(task, 'no work id found in mempool')
+ return
+ end
+ lua_util.debugm(N, task, 'polling for result for work id: %s', work_id)
+
+ local function http_callback(err, code, body, hdrs)
+ if err then
+ rspamd_logger.err(task, 'http error: %s', err)
+ maybe_defer(task)
+ return
+ end
+ if code ~= 200 then
+ rspamd_logger.err(task, 'bad http code: %s', code)
+ maybe_defer(task)
+ return
+ end
+ local parser = ucl.parser()
+ local _, parse_err = parser:parse_string(body)
+ if parse_err then
+ rspamd_logger.err(task, 'cannot parse JSON: %s', err)
+ maybe_defer(task)
+ return
+ end
+ local obj = parser:get_object()
+ return process_actions(task, obj, false)
+ end
+
+ rspamd_http.request({
+ task = task,
+ url = settings.actions_url .. work_id,
+ callback = http_callback,
+ timeout = settings.http_timeout,
+ gzip = settings.gzip,
+ keepalive = settings.keepalive,
+ no_ssl_verify = settings.no_ssl_verify,
+ })
+end
+
+local function submit(task)
+
+ local function http_callback(err, code, body, hdrs)
+ if err then
+ rspamd_logger.err(task, 'http error: %s', err)
+ maybe_defer(task)
+ return
+ end
+ if code ~= 201 then
+ rspamd_logger.err(task, 'bad http code: %s', code)
+ maybe_defer(task)
+ return
+ end
+ local parser = ucl.parser()
+ local _, parse_err = parser:parse_string(body)
+ if parse_err then
+ rspamd_logger.err(task, 'cannot parse JSON: %s', err)
+ maybe_defer(task)
+ return
+ end
+ local obj = parser:get_object()
+ local work_id = obj.work_id
+ if work_id then
+ task:get_mempool():set_variable('contextal_work_id', work_id)
+ end
+ task:insert_result(settings.submission_symbol, 1.0,
+ string.format('work_id=%s', work_id or 'nil'))
+ if wait_request_ttl then
+ task:add_timer(settings.request_ttl, action_cb)
+ end
+ end
+
+ local req = {
+ object_data = {['data'] = task:get_content()},
+ }
+ if settings.request_ttl then
+ req.ttl = {['data'] = tostring(settings.request_ttl)}
+ end
+ if settings.max_recursion then
+ req.maxrec = {['data'] = tostring(settings.max_recursion)}
+ end
+ rspamd_http.request({
+ task = task,
+ url = settings.submit_url,
+ body = lua_util.table_to_multipart_body(req, static_boundary),
+ callback = http_callback,
+ headers = {
+ ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
+ },
+ timeout = settings.http_timeout,
+ gzip = settings.gzip,
+ keepalive = settings.keepalive,
+ no_ssl_verify = settings.no_ssl_verify,
+ })
+end
+
+local function cache_hit(task, err, data)
+ if err then
+ rspamd_logger.err(task, 'error getting cache: %s', err)
+ else
+ process_cached(task, data)
+ end
+end
+
+local function submit_cb(task)
+ if cache_context then
+ redis_cache.cache_get(task,
+ task:get_digest(),
+ cache_context,
+ settings.cache_timeout,
+ submit,
+ cache_hit
+ )
+ else
+ submit(task)
+ end
+end
+
+local function set_url_path(base, path)
+ local slash = base:sub(#base) == '/' and '' or '/'
+ return base .. slash .. path
+end
+
+settings = lua_util.override_defaults(settings, opts)
+
+local res, err = config_schema:transform(settings)
+if not res then
+ rspamd_logger.warnx(rspamd_config, 'plugin %s is misconfigured: %s', N, err)
+ local err_msg = string.format("schema error: %s", res)
+ lua_util.config_utils.push_config_error(N, err_msg)
+ lua_util.disable_module(N, "failed", err_msg)
+ return
+end
+
+for _, k in ipairs(settings.custom_actions) do
+ contextal_actions[k] = true
+end
+
+if not settings.base_url then
+ if not (settings.submit_url and settings.actions_url) then
+ rspamd_logger.err(rspamd_config, 'no URL configured for contextal')
+ lua_util.disable_module(N, 'config')
+ return
+ end
+else
+ if not settings.submit_url then
+ settings.submit_url = set_url_path(settings.base_url, 'api/v1/submit')
+ end
+ if not settings.actions_url then
+ settings.actions_url = set_url_path(settings.base_url, 'api/v1/actions/')
+ end
+end
+
+redis_params = lua_redis.parse_redis_server(N)
+if redis_params then
+ cache_context = redis_cache.create_cache_context(redis_params, {
+ cache_prefix = settings.cache_prefix,
+ cache_ttl = settings.cache_ttl,
+ cache_format = 'json',
+ cache_use_hashing = false
+ })
+end
+
+local submission_id = rspamd_config:register_symbol({
+ name = settings.submission_symbol,
+ type = 'normal',
+ group = N,
+ callback = submit_cb
+})
+
+local top_options = rspamd_config:get_all_opt('options')
+if settings.request_ttl and settings.request_ttl >= (top_options.task_timeout * 0.8) then
+ rspamd_logger.info(rspamd_config, [[request ttl is >= 80% of task timeout, won't wait on processing]])
+ wait_request_ttl = false
+elseif not settings.request_ttl then
+ wait_request_ttl = false
+end
+
+local parent_id
+if wait_request_ttl then
+ parent_id = submission_id
+else
+ parent_id = rspamd_config:register_symbol({
+ name = settings.action_symbol_prefix,
+ type = 'postfilter',
+ priority = lua_util.symbols_priorities.high - 1,
+ group = N,
+ callback = action_cb
+ })
+end
+
+for k in pairs(contextal_actions) do
+ rspamd_config:register_symbol({
+ name = settings.action_symbol_prefix .. '_' .. k,
+ parent = parent_id,
+ type = 'virtual',
+ group = N,
+ })
+end
diff --git a/src/plugins/lua/elastic.lua b/src/plugins/lua/elastic.lua
index 8bed9fcf4..b26fbd8e8 100644
--- a/src/plugins/lua/elastic.lua
+++ b/src/plugins/lua/elastic.lua
@@ -19,6 +19,7 @@ local rspamd_logger = require 'rspamd_logger'
local rspamd_http = require "rspamd_http"
local lua_util = require "lua_util"
local rspamd_util = require "rspamd_util"
+local rspamd_text = require "rspamd_text"
local ucl = require "ucl"
local upstream_list = require "rspamd_upstream_list"
@@ -287,7 +288,7 @@ end
local function handle_error(action, component, limit)
if states[component]['errors'] >= limit then
rspamd_logger.errx(rspamd_config, 'cannot %s elastic %s, failed attempts: %s/%s, stop trying',
- action, component:gsub('_', ' '), states[component]['errors'], limit)
+ action, component:gsub('_', ' '), states[component]['errors'], limit)
states[component]['configured'] = true
else
states[component]['errors'] = states[component]['errors'] + 1
@@ -315,54 +316,6 @@ local function get_received_delay(received_headers)
return delay
end
-local function is_empty(str)
- -- define a pattern that includes invisible unicode characters
- local str_cleared = str:gsub('[' ..
- '\xC2\xA0' .. -- U+00A0 non-breaking space
- '\xE2\x80\x8B' .. -- U+200B zero width space
- '\xEF\xBB\xBF' .. -- U+FEFF byte order mark (zero width no-break space)
- '\xE2\x80\x8C' .. -- U+200C zero width non-joiner
- '\xE2\x80\x8D' .. -- U+200D zero width joiner
- '\xE2\x80\x8E' .. -- U+200E left-to-right mark
- '\xE2\x80\x8F' .. -- U+200F right-to-left mark
- '\xE2\x81\xA0' .. -- U+2060 word joiner
- '\xE2\x80\xAA' .. -- U+202A left-to-right embedding
- '\xE2\x80\xAB' .. -- U+202B right-to-left embedding
- '\xE2\x80\xAC' .. -- U+202C pop directional formatting
- '\xE2\x80\xAD' .. -- U+202D left-to-right override
- '\xE2\x80\xAE' .. -- U+202E right-to-left override
- '\xE2\x81\x9F' .. -- U+2061 function application
- '\xE2\x81\xA1' .. -- U+2061 invisible separator
- '\xE2\x81\xA2' .. -- U+2062 invisible times
- '\xE2\x81\xA3' .. -- U+2063 invisible separator
- '\xE2\x81\xA4' .. -- U+2064 invisible plus
- ']', '') -- gsub replaces all matched characters with an empty string
- if str_cleared:match('[%S]') then
- return false
- else
- return true
- end
-end
-
-local function fill_empty_strings(tbl, empty_value)
- local filled_tbl = {}
- for key, value in pairs(tbl) do
- if value and type(value) == 'table' then
- local nested_filtered = fill_empty_strings(value, empty_value)
- if next(nested_filtered) ~= nil then
- filled_tbl[key] = nested_filtered
- end
- elseif type(value) == 'boolean' then
- filled_tbl[key] = value
- elseif value and type(value) == 'string' and is_empty(value) then
- filled_tbl[key] = empty_value
- elseif value then
- filled_tbl[key] = value
- end
- end
- return filled_tbl
-end
-
local function create_bulk_json(es_index, logs_to_send)
local tbl = {}
for _, row in pairs(logs_to_send) do
@@ -407,15 +360,17 @@ local function elastic_send_data(flush_all, task, cfg, ev_base)
local function http_callback(err, code, body, _)
local push_done = false
if err then
- rspamd_logger.errx(log_object, 'cannot send logs to elastic (%s): %s; failed attempts: %s/%s',
- push_url, err, buffer['errors'], settings['limits']['max_fail'])
+ rspamd_logger.errx(log_object,
+ 'cannot send logs to elastic (%s): %s; failed attempts: %s/%s',
+ push_url, err, buffer['errors'], settings['limits']['max_fail'])
elseif code == 200 then
local parser = ucl.parser()
local res, ucl_err = parser:parse_string(body)
if not ucl_err and res then
local obj = parser:get_object()
push_done = true
- lua_util.debugm(N, log_object, 'successfully sent payload with %s logs', nlogs_to_send)
+ lua_util.debugm(N, log_object,
+ 'successfully sent payload with %s logs', nlogs_to_send)
if obj['errors'] then
for _, value in pairs(obj['items']) do
if value['index'] and value['index']['status'] >= 400 then
@@ -424,15 +379,15 @@ local function elastic_send_data(flush_all, task, cfg, ev_base)
local error_type = safe_get(value, 'index', 'error', 'type') or ''
local error_reason = safe_get(value, 'index', 'error', 'reason') or ''
rspamd_logger.warnx(log_object,
- 'error while pushing logs to elastic, status: %s, index: %s, type: %s, reason: %s',
- status, index, error_type, error_reason)
+ 'error while pushing logs to elastic, status: %s, index: %s, type: %s, reason: %s',
+ status, index, error_type, error_reason)
end
end
end
else
rspamd_logger.errx(log_object,
- 'cannot parse response from elastic (%s): %s; failed attempts: %s/%s',
- push_url, ucl_err, buffer['errors'], settings['limits']['max_fail'])
+ 'cannot parse response from elastic (%s): %s; failed attempts: %s/%s',
+ push_url, ucl_err, buffer['errors'], settings['limits']['max_fail'])
end
else
rspamd_logger.errx(log_object,
@@ -448,8 +403,8 @@ local function elastic_send_data(flush_all, task, cfg, ev_base)
upstream:fail()
if buffer['errors'] >= settings['limits']['max_fail'] then
rspamd_logger.errx(log_object,
- 'failed to send %s log lines, failed attempts: %s/%s, removing failed logs from bugger',
- nlogs_to_send, buffer['errors'], settings['limits']['max_fail'])
+ 'failed to send %s log lines, failed attempts: %s/%s, removing failed logs from bugger',
+ nlogs_to_send, buffer['errors'], settings['limits']['max_fail'])
buffer['logs']:pop_first(nlogs_to_send)
buffer['errors'] = 0
else
@@ -494,6 +449,7 @@ local function get_general_metadata(task)
local empty = settings['index_template']['empty_value']
local user = task:get_user()
r.rspamd_server = rspamd_hostname or empty
+ r.digest = task:get_digest() or empty
r.action = task:get_metric_action() or empty
r.score = task:get_metric_score()[1] or 0
@@ -565,8 +521,8 @@ local function get_general_metadata(task)
if task:has_from('smtp') then
local from = task:get_from({ 'smtp', 'orig' })[1]
if from and
- from['user'] and #from['user'] > 0 and
- from['domain'] and #from['domain'] > 0
+ from['user'] and #from['user'] > 0 and
+ from['domain'] and #from['domain'] > 0
then
r.from_user = from['user']
r.from_domain = from['domain']:lower()
@@ -578,8 +534,8 @@ local function get_general_metadata(task)
if task:has_from('mime') then
local mime_from = task:get_from({ 'mime', 'orig' })[1]
if mime_from and
- mime_from['user'] and #mime_from['user'] > 0 and
- mime_from['domain'] and #mime_from['domain'] > 0
+ mime_from['user'] and #mime_from['user'] > 0 and
+ mime_from['domain'] and #mime_from['domain'] > 0
then
r.mime_from_user = mime_from['user']
r.mime_from_domain = mime_from['domain']:lower()
@@ -608,25 +564,34 @@ local function get_general_metadata(task)
local function process_header(name)
local hdr = task:get_header_full(name)
- local headers_text_ignore_above = settings['index_template']['headers_text_ignore_above'] - 3
if hdr and #hdr > 0 then
local l = {}
for _, h in ipairs(hdr) do
- if settings['index_template']['headers_count_ignore_above'] ~= 0 and
- #l >= settings['index_template']['headers_count_ignore_above']
+ if settings['index_template']['headers_count_ignore_above'] > 0 and
+ #l >= settings['index_template']['headers_count_ignore_above']
then
table.insert(l, 'ignored above...')
break
end
local header
- if settings['index_template']['headers_text_ignore_above'] ~= 0 and
- h.decoded and #h.decoded >= headers_text_ignore_above
- then
- header = h.decoded:sub(1, headers_text_ignore_above) .. '...'
- elseif h.decoded and #h.decoded > 0 then
- header = h.decoded
+ local header_len
+ if h.decoded then
+ header = rspamd_text.fromstring(h.decoded)
+ header_len = header:len_utf8()
else
- header = empty
+ table.insert(l, empty)
+ break
+ end
+ if not header_len or header_len == 0 then
+ table.insert(l, empty)
+ break
+ end
+ if settings['index_template']['headers_text_ignore_above'] > 0 and
+ header_len >= settings['index_template']['headers_text_ignore_above']
+ then
+ header = header:sub_utf8(1, settings['index_template']['headers_text_ignore_above'])
+ table.insert(l, header .. rspamd_text.fromstring('...'))
+ break
end
table.insert(l, header)
end
@@ -686,7 +651,7 @@ local function get_general_metadata(task)
r.received_delay = get_received_delay(task:get_received_headers())
- return fill_empty_strings(r, empty)
+ return r
end
local function elastic_collect(task)
@@ -773,8 +738,8 @@ local function configure_geoip_pipeline(cfg, ev_base)
upstream:ok()
else
rspamd_logger.errx(rspamd_config,
- 'cannot configure elastic geoip pipeline (%s), status code: %s, response: %s',
- geoip_url, code, body)
+ 'cannot configure elastic geoip pipeline (%s), status code: %s, response: %s',
+ geoip_url, code, body)
upstream:fail()
handle_error('configure', 'geoip_pipeline', settings['limits']['max_fail'])
end
@@ -810,8 +775,9 @@ local function put_index_policy(cfg, ev_base, upstream, host, policy_url, index_
states['index_policy']['configured'] = true
upstream:ok()
else
- rspamd_logger.errx(rspamd_config, 'cannot configure elastic index policy (%s), status code: %s, response: %s',
- policy_url, code, body)
+ rspamd_logger.errx(rspamd_config,
+ 'cannot configure elastic index policy (%s), status code: %s, response: %s',
+ policy_url, code, body)
upstream:fail()
handle_error('configure', 'index_policy', settings['limits']['max_fail'])
end
@@ -867,7 +833,7 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_
if not lua_util.table_cmp(our_policy['policy']['default_state'], current_default_state) then
update_needed = true
elseif not lua_util.table_cmp(our_policy['policy']['ism_template'][1]['index_patterns'],
- current_ism_index_patterns) then
+ current_ism_index_patterns) then
update_needed = true
elseif not lua_util.table_cmp(our_policy['policy']['states'], current_states) then
update_needed = true
@@ -890,8 +856,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_
put_index_policy(cfg, ev_base, upstream, host, policy_url, index_policy_json)
else
rspamd_logger.errx(rspamd_config,
- 'current elastic index policy (%s) not returned correct seq_no/primary_term, policy will not be updated, response: %s',
- policy_url, body)
+ 'current elastic index policy (%s) not returned correct seq_no/primary_term, policy will not be updated, response: %s',
+ policy_url, body)
upstream:fail()
handle_error('validate current', 'index_policy', settings['limits']['max_fail'])
end
@@ -909,8 +875,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_
end
else
rspamd_logger.errx(rspamd_config,
- 'cannot get current elastic index policy (%s), status code: %s, response: %s',
- policy_url, code, body)
+ 'cannot get current elastic index policy (%s), status code: %s, response: %s',
+ policy_url, code, body)
handle_error('get current', 'index_policy', settings['limits']['max_fail'])
upstream:fail()
end
@@ -1037,7 +1003,7 @@ local function configure_index_policy(cfg, ev_base)
}
index_policy['policy']['phases']['delete'] = delete_obj
end
- -- opensearch state policy with hot state
+ -- opensearch state policy with hot state
elseif detected_distro['name'] == 'opensearch' then
local retry = {
count = 3,
@@ -1313,6 +1279,7 @@ local function configure_index_template(cfg, ev_base)
type = 'object',
properties = {
rspamd_server = t_keyword,
+ digest = t_keyword,
action = t_keyword,
score = t_double,
symbols = symbols_obj,
@@ -1381,7 +1348,7 @@ local function configure_index_template(cfg, ev_base)
upstream:ok()
else
rspamd_logger.errx(rspamd_config, 'cannot configure elastic index template (%s), status code: %s, response: %s',
- template_url, code, body)
+ template_url, code, body)
upstream:fail()
handle_error('configure', 'index_template', settings['limits']['max_fail'])
end
@@ -1424,8 +1391,9 @@ local function verify_distro(manual)
local supported_distro_info = supported_distro[detected_distro_name]
-- check that detected_distro_version is valid
if not detected_distro_version or type(detected_distro_version) ~= 'string' then
- rspamd_logger.errx(rspamd_config, 'elastic version should be a string, but we received: %s',
- type(detected_distro_version))
+ rspamd_logger.errx(rspamd_config,
+ 'elastic version should be a string, but we received: %s',
+ type(detected_distro_version))
valid = false
elseif detected_distro_version == '' then
rspamd_logger.errx(rspamd_config, 'unsupported elastic version: empty string')
@@ -1434,21 +1402,22 @@ local function verify_distro(manual)
-- compare versions using compare_versions
local cmp_from = compare_versions(detected_distro_version, supported_distro_info['from'])
if cmp_from == -1 then
- rspamd_logger.errx(rspamd_config, 'unsupported elastic version: %s, minimal supported version of %s is %s',
- detected_distro_version, detected_distro_name, supported_distro_info['from'])
+ rspamd_logger.errx(rspamd_config,
+ 'unsupported elastic version: %s, minimal supported version of %s is %s',
+ detected_distro_version, detected_distro_name, supported_distro_info['from'])
valid = false
else
local cmp_till = compare_versions(detected_distro_version, supported_distro_info['till'])
if (cmp_till >= 0) and not supported_distro_info['till_unknown'] then
rspamd_logger.errx(rspamd_config,
- 'unsupported elastic version: %s, maximum supported version of %s is less than %s',
- detected_distro_version, detected_distro_name, supported_distro_info['till'])
+ 'unsupported elastic version: %s, maximum supported version of %s is less than %s',
+ detected_distro_version, detected_distro_name, supported_distro_info['till'])
valid = false
elseif (cmp_till >= 0) and supported_distro_info['till_unknown'] then
rspamd_logger.warnx(rspamd_config,
- 'compatibility of elastic version: %s is unknown, maximum known supported version of %s is less than %s,' ..
- 'use at your own risk',
- detected_distro_version, detected_distro_name, supported_distro_info['till'])
+ 'compatibility of elastic version: %s is unknown, maximum known ' ..
+ 'supported version of %s is less than %s, use at your own risk',
+ detected_distro_version, detected_distro_name, supported_distro_info['till'])
valid_unknown = true
end
end
@@ -1460,11 +1429,12 @@ local function verify_distro(manual)
else
if valid and manual then
rspamd_logger.infox(
- rspamd_config, 'assuming elastic distro: %s, version: %s', detected_distro_name, detected_distro_version)
+ rspamd_config, 'assuming elastic distro: %s, version: %s', detected_distro_name, detected_distro_version)
detected_distro['supported'] = true
elseif valid and not manual then
- rspamd_logger.infox(rspamd_config, 'successfully connected to elastic distro: %s, version: %s',
- detected_distro_name, detected_distro_version)
+ rspamd_logger.infox(rspamd_config,
+ 'successfully connected to elastic distro: %s, version: %s',
+ detected_distro_name, detected_distro_version)
detected_distro['supported'] = true
else
handle_error('configure', 'distro', settings['version']['autodetect_max_fail'])
@@ -1477,7 +1447,7 @@ local function configure_distro(cfg, ev_base)
detected_distro['name'] = settings['version']['override']['name']
detected_distro['version'] = settings['version']['override']['version']
rspamd_logger.infox(rspamd_config,
- 'automatic detection of elastic distro and version is disabled, taking configuration from settings')
+ 'automatic detection of elastic distro and version is disabled, taking configuration from settings')
verify_distro(true)
end
@@ -1490,14 +1460,16 @@ local function configure_distro(cfg, ev_base)
rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s): %s', root_url, err)
upstream:fail()
elseif code ~= 200 then
- rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s), status code: %s, response: %s', root_url, code,
- body)
+ rspamd_logger.errx(rspamd_config,
+ 'cannot connect to elastic (%s), status code: %s, response: %s',
+ root_url, code, body)
upstream:fail()
else
local parser = ucl.parser()
local res, ucl_err = parser:parse_string(body)
if not res then
- rspamd_logger.errx(rspamd_config, 'failed to parse reply from elastic (%s): %s', root_url, ucl_err)
+ rspamd_logger.errx(rspamd_config, 'failed to parse reply from elastic (%s): %s',
+ root_url, ucl_err)
upstream:fail()
else
local obj = parser:get_object()
diff --git a/src/plugins/lua/fuzzy_collect.lua b/src/plugins/lua/fuzzy_collect.lua
index 132ace90c..060cc2fc2 100644
--- a/src/plugins/lua/fuzzy_collect.lua
+++ b/src/plugins/lua/fuzzy_collect.lua
@@ -34,7 +34,7 @@ local settings = {
local function send_data_mirror(m, cfg, ev_base, body)
local function store_callback(err, _, _, _)
if err then
- rspamd_logger.errx(cfg, 'cannot save data on %(%s): %s', m.server, m.name, err)
+ rspamd_logger.errx(cfg, 'cannot save data on %s(%s): %s', m.server, m.name, err)
else
rspamd_logger.infox(cfg, 'saved data on %s(%s)', m.server, m.name)
end
diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua
index e4a77c6dd..331dbbce2 100644
--- a/src/plugins/lua/gpt.lua
+++ b/src/plugins/lua/gpt.lua
@@ -15,13 +15,14 @@ limitations under the License.
]] --
local N = "gpt"
+local REDIS_PREFIX = "rsllm"
local E = {}
if confighelp then
rspamd_config:add_example(nil, 'gpt',
- "Performs postfiltering using GPT model",
- [[
-gpt {
+ "Performs postfiltering using GPT model",
+ [[
+ gpt {
# Supported types: openai, ollama
type = "openai";
# Your key to access the API
@@ -48,9 +49,11 @@ gpt {
allow_passthrough = false;
# Check messages that are apparent ham (no action and negative score)
allow_ham = false;
- # default send response_format field { type = "json_object" }
- include_response_format = true,
-}
+ # Add header with reason (null to disable)
+ reason_header = "X-GPT-Reason";
+ # Use JSON format for response
+ json = false;
+ }
]])
return
end
@@ -59,8 +62,10 @@ local lua_util = require "lua_util"
local rspamd_http = require "rspamd_http"
local rspamd_logger = require "rspamd_logger"
local lua_mime = require "lua_mime"
+local lua_redis = require "lua_redis"
local ucl = require "ucl"
local fun = require "fun"
+local lua_cache = require "lua_cache"
-- Exclude checks if one of those is found
local default_symbols_to_except = {
@@ -73,6 +78,32 @@ local default_symbols_to_except = {
BOUNCE = -1,
}
+local default_extra_symbols = {
+ GPT_MARKETING = {
+ score = 0.0,
+ description = 'GPT model detected marketing content',
+ category = 'marketing',
+ },
+ GPT_PHISHING = {
+ score = 3.0,
+ description = 'GPT model detected phishing content',
+ category = 'phishing',
+ },
+ GPT_SCAM = {
+ score = 3.0,
+ description = 'GPT model detected scam content',
+ category = 'scam',
+ },
+ GPT_MALWARE = {
+ score = 3.0,
+ description = 'GPT model detected malware content',
+ category = 'malware',
+ },
+}
+
+-- Should be filled from extra symbols
+local categories_map = {}
+
local settings = {
type = 'openai',
api_key = nil,
@@ -83,11 +114,18 @@ local settings = {
prompt = nil,
condition = nil,
autolearn = false,
+ reason_header = nil,
url = 'https://api.openai.com/v1/chat/completions',
- symbols_to_except = default_symbols_to_except,
+ symbols_to_except = nil,
+ symbols_to_trigger = nil, -- Exclude/include logic
allow_passthrough = false,
allow_ham = false,
+ json = false,
+ extra_symbols = nil,
+ cache_prefix = REDIS_PREFIX,
}
+local redis_params
+local cache_context
local function default_condition(task)
-- Check result
@@ -110,22 +148,44 @@ local function default_condition(task)
return false, 'negative score, already decided as ham'
end
end
- -- We also exclude some symbols
- for s, required_weight in pairs(settings.symbols_to_except) do
- if task:has_symbol(s) then
- if required_weight > 0 then
- -- Also check score
- local sym = task:get_symbol(s) or E
- -- Must exist as we checked it before with `has_symbol`
- if sym.weight then
- if math.abs(sym.weight) >= required_weight then
- return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')'
+
+ if settings.symbols_to_except then
+ for s, required_weight in pairs(settings.symbols_to_except) do
+ if task:has_symbol(s) then
+ if required_weight > 0 then
+ -- Also check score
+ local sym = task:get_symbol(s) or E
+ -- Must exist as we checked it before with `has_symbol`
+ if sym.weight then
+ if math.abs(sym.weight) >= required_weight then
+ return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')'
+ end
end
+ lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
+ sym.weight, required_weight)
+ else
+ return false, 'skip as "' .. s .. '" is found'
end
- lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
+ end
+ end
+ end
+ if settings.symbols_to_trigger then
+ for s, required_weight in pairs(settings.symbols_to_trigger) do
+ if task:has_symbol(s) then
+ if required_weight > 0 then
+ -- Also check score
+ local sym = task:get_symbol(s) or E
+ -- Must exist as we checked it before with `has_symbol`
+ if sym.weight then
+ if math.abs(sym.weight) < required_weight then
+ return false, 'skip as "' .. s .. '" is found with low weight (weight: ' .. sym.weight .. ')'
+ end
+ end
+ lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
sym.weight, required_weight)
+ end
else
- return false, 'skip as "' .. s .. '" is found'
+ return false, 'skip as "' .. s .. '" is not found'
end
end
end
@@ -149,10 +209,10 @@ local function default_condition(task)
local words = sel_part:get_words('norm')
nwords = #words
if nwords > settings.max_tokens then
- return true, table.concat(words, ' ', 1, settings.max_tokens)
+ return true, table.concat(words, ' ', 1, settings.max_tokens), sel_part
end
end
- return true, sel_part:get_content_oneline()
+ return true, sel_part:get_content_oneline(), sel_part
end
local function maybe_extract_json(str)
@@ -193,7 +253,16 @@ local function maybe_extract_json(str)
return nil
end
-local function default_conversion(task, input)
+-- Helper function to remove <think>...</think> and trim leading newlines
+local function clean_gpt_response(text)
+ -- Remove <think>...</think> including multiline
+ text = text:gsub("<think>.-</think>", "")
+ -- Trim leading whitespace and newlines
+ text = text:gsub("^%s*\n*", "")
+ return text
+end
+
+local function default_openai_json_conversion(task, input)
local parser = ucl.parser()
local res, err = parser:parse_string(input)
if not res then
@@ -241,7 +310,7 @@ local function default_conversion(task, input)
elseif reply.probability == "low" then
spam_score = 0.1
else
- rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability)
+ rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability)
end
end
@@ -249,14 +318,111 @@ local function default_conversion(task, input)
rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
end
- return spam_score
+ return spam_score, reply.reason, {}
end
rspamd_logger.errx(task, 'cannot convert spam score: %s', first_message)
return
end
-local function ollama_conversion(task, input)
+-- Remove what we don't need
+local function clean_reply_line(line)
+ if not line then
+ return ''
+ end
+ return lua_util.str_trim(line):gsub("^%d%.%s+", "")
+end
+
+-- Assume that we have 3 lines: probability, reason, additional symbols
+local function default_openai_plain_conversion(task, input)
+ local parser = ucl.parser()
+ local res, err = parser:parse_string(input)
+ if not res then
+ rspamd_logger.errx(task, 'cannot parse reply: %s', err)
+ return
+ end
+ local reply = parser:get_object()
+ if not reply then
+ rspamd_logger.errx(task, 'cannot get object from reply')
+ return
+ end
+
+ if type(reply.choices) ~= 'table' or type(reply.choices[1]) ~= 'table' then
+ rspamd_logger.errx(task, 'no choices in reply')
+ return
+ end
+
+ local first_message = reply.choices[1].message.content
+
+ if not first_message then
+ rspamd_logger.errx(task, 'no content in the first message')
+ return
+ end
+
+ -- Clean message
+ first_message = clean_gpt_response(first_message)
+
+ local lines = lua_util.str_split(first_message, '\n')
+ local first_line = clean_reply_line(lines[1])
+ local spam_score = tonumber(first_line)
+ local reason = clean_reply_line(lines[2])
+ local categories = lua_util.str_split(clean_reply_line(lines[3]), ',')
+
+ if type(reply.usage) == 'table' then
+ rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
+ end
+
+ if spam_score then
+ return spam_score, reason, categories
+ end
+
+ rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1], first_message)
+ return
+end
+
+local function default_ollama_plain_conversion(task, input)
+ local parser = ucl.parser()
+ local res, err = parser:parse_string(input)
+ if not res then
+ rspamd_logger.errx(task, 'cannot parse reply: %s', err)
+ return
+ end
+ local reply = parser:get_object()
+ if not reply then
+ rspamd_logger.errx(task, 'cannot get object from reply')
+ return
+ end
+
+ if type(reply.message) ~= 'table' then
+ rspamd_logger.errx(task, 'bad message in reply')
+ return
+ end
+
+ local first_message = reply.message.content
+
+ if not first_message then
+ rspamd_logger.errx(task, 'no content in the first message')
+ return
+ end
+
+ -- Clean message
+ first_message = clean_gpt_response(first_message)
+
+ local lines = lua_util.str_split(first_message, '\n')
+ local first_line = clean_reply_line(lines[1])
+ local spam_score = tonumber(first_line)
+ local reason = clean_reply_line(lines[2])
+ local categories = lua_util.str_split(clean_reply_line(lines[3]), ',')
+
+ if spam_score then
+ return spam_score, reason, categories
+ end
+
+ rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1], first_message)
+ return
+end
+
+local function default_ollama_json_conversion(task, input)
local parser = ucl.parser()
local res, err = parser:parse_string(input)
if not res then
@@ -304,7 +470,7 @@ local function ollama_conversion(task, input)
elseif reply.probability == "low" then
spam_score = 0.1
else
- rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability)
+ rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability)
end
end
@@ -312,13 +478,126 @@ local function ollama_conversion(task, input)
rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
end
- return spam_score
+ return spam_score, reply.reason
end
rspamd_logger.errx(task, 'cannot convert spam score: %s', first_message)
return
end
+-- Make cache specific to all settings to avoid conflicts
+local env_digest = nil
+
+local function redis_cache_key(sel_part)
+ if not env_digest then
+ local hasher = require "rspamd_cryptobox_hash"
+ local digest = hasher.create()
+ digest:update(settings.prompt)
+ digest:update(settings.model)
+ digest:update(settings.url)
+ env_digest = digest:hex():sub(1, 4)
+ end
+ return string.format('%s_%s', env_digest,
+ sel_part:get_mimepart():get_digest():sub(1, 24))
+end
+
+local function process_categories(task, categories)
+ for _, category in ipairs(categories) do
+ local sym = categories_map[category:lower()]
+ if sym then
+ task:insert_result(sym.name, 1.0)
+ end
+ end
+end
+
+local function insert_results(task, result, sel_part)
+ if not result.probability then
+ rspamd_logger.errx(task, 'no probability in result')
+ return
+ end
+
+ if result.probability > 0.5 then
+ task:insert_result('GPT_SPAM', (result.probability - 0.5) * 2, tostring(result.probability))
+ if settings.autolearn then
+ task:set_flag("learn_spam")
+ end
+
+ if result.categories then
+ process_categories(task, result.categories)
+ end
+ else
+ task:insert_result('GPT_HAM', (0.5 - result.probability) * 2, tostring(result.probability))
+ if settings.autolearn then
+ task:set_flag("learn_ham")
+ end
+ if result.categories then
+ process_categories(task, result.categories)
+ end
+ end
+ if result.reason and settings.reason_header then
+ lua_mime.modify_headers(task,
+ { add = { [settings.reason_header] = { value = tostring(result.reason), order = 1 } } })
+ end
+
+ if cache_context then
+ lua_cache.cache_set(task, redis_cache_key(sel_part), result, cache_context)
+ end
+end
+
+local function check_consensus_and_insert_results(task, results, sel_part)
+ for _, result in ipairs(results) do
+ if not result.checked then
+ return
+ end
+ end
+
+ local nspam, nham = 0, 0
+ local max_spam_prob, max_ham_prob = 0, 0
+ local reasons = {}
+
+ for _, result in ipairs(results) do
+ if result.success then
+ if result.probability > 0.5 then
+ nspam = nspam + 1
+ max_spam_prob = math.max(max_spam_prob, result.probability)
+ lua_util.debugm(N, task, "model: %s; spam: %s; reason: '%s'",
+ result.model, result.probability, result.reason)
+ else
+ nham = nham + 1
+ max_ham_prob = math.min(max_ham_prob, result.probability)
+ lua_util.debugm(N, task, "model: %s; ham: %s; reason: '%s'",
+ result.model, result.probability, result.reason)
+ end
+
+ if result.reason then
+ table.insert(reasons, result)
+ end
+ end
+ end
+
+ lua_util.shuffle(reasons)
+ local reason = reasons[1] or nil
+
+ if nspam > nham and max_spam_prob > 0.75 then
+ insert_results(task, {
+ probability = max_spam_prob,
+ reason = reason.reason,
+ categories = reason.categories,
+ },
+ sel_part)
+ elseif nham > nspam and max_ham_prob < 0.25 then
+ insert_results(task, {
+ probability = max_ham_prob,
+ reason = reason.reason,
+ categories = reason.categories,
+ },
+ sel_part)
+ else
+ -- No consensus
+ lua_util.debugm(N, task, "no consensus")
+ end
+end
+
local function get_meta_llm_content(task)
local url_content = "Url domains: no urls found"
if task:has_urls() then
@@ -336,57 +615,70 @@ local function get_meta_llm_content(task)
return url_content, from_content
end
-local function default_llm_check(task)
- local ret, content = settings.condition(task)
+local function check_llm_uncached(task, content, sel_part)
+ return settings.specific_check(task, content, sel_part)
+end
- if not ret then
- rspamd_logger.info(task, "skip checking gpt as the condition is not met: %s", content)
- return
- end
+local function check_llm_cached(task, content, sel_part)
+ local cache_key = redis_cache_key(sel_part)
- if not content then
- lua_util.debugm(N, task, "no content to send to gpt classification")
- return
- end
+ lua_cache.cache_get(task, cache_key, cache_context, settings.timeout * 1.5, function()
+ check_llm_uncached(task, content, sel_part)
+ end, function(_, err, data)
+ if err then
+ rspamd_logger.errx(task, 'cannot get cache: %s', err)
+ check_llm_uncached(task, content, sel_part)
+ end
+ if data then
+ rspamd_logger.infox(task, 'found cached response %s', cache_key)
+ insert_results(task, data, sel_part)
+ else
+ check_llm_uncached(task, content, sel_part)
+ end
+ end)
+end
+
+local function openai_check(task, content, sel_part)
lua_util.debugm(N, task, "sending content to gpt: %s", content)
local upstream
- local function on_reply(err, code, body)
+ local results = {}
- if err then
- rspamd_logger.errx(task, 'request failed: %s', err)
- upstream:fail()
- return
- end
+ local function gen_reply_closure(model, idx)
+ return function(err, code, body)
+ results[idx].checked = true
+ if err then
+ rspamd_logger.errx(task, '%s: request failed: %s', model, err)
+ upstream:fail()
+ check_consensus_and_insert_results(task, results, sel_part)
+ return
+ end
- upstream:ok()
- lua_util.debugm(N, task, "got reply: %s", body)
- if code ~= 200 then
- rspamd_logger.errx(task, 'bad reply: %s', body)
- return
- end
+ upstream:ok()
+ lua_util.debugm(N, task, "%s: got reply: %s", model, body)
+ if code ~= 200 then
+ rspamd_logger.errx(task, 'bad reply: %s', body)
+ return
+ end
- local reply = settings.reply_conversion(task, body)
- if not reply then
- return
- end
+ local reply, reason, categories = settings.reply_conversion(task, body)
- if reply > 0.75 then
- task:insert_result('GPT_SPAM', (reply - 0.75) * 4, tostring(reply))
- if settings.autolearn then
- task:set_flag("learn_spam")
- end
- elseif reply < 0.25 then
- task:insert_result('GPT_HAM', (0.25 - reply) * 4, tostring(reply))
- if settings.autolearn then
- task:set_flag("learn_ham")
+ results[idx].model = model
+
+ if reply then
+ results[idx].success = true
+ results[idx].probability = reply
+ results[idx].reason = reason
+
+ if categories then
+ results[idx].categories = categories
+ end
end
- else
- lua_util.debugm(N, task, "uncertain result: %s", reply)
- end
+ check_consensus_and_insert_results(task, results, sel_part)
+ end
end
local from_content, url_content = get_meta_llm_content(task)
@@ -402,7 +694,7 @@ local function default_llm_check(task)
},
{
role = 'user',
- content = 'Subject: ' .. task:get_subject() or '',
+ content = 'Subject: ' .. (task:get_subject() or ''),
},
{
role = 'user',
@@ -424,81 +716,82 @@ local function default_llm_check(task)
body.response_format = { type = "json_object" }
end
- upstream = settings.upstreams:get_upstream_round_robin()
- local http_params = {
- url = settings.url,
- mime_type = 'application/json',
- timeout = settings.timeout,
- log_obj = task,
- callback = on_reply,
- headers = {
- ['Authorization'] = 'Bearer ' .. settings.api_key,
- },
- keepalive = true,
- body = ucl.to_format(body, 'json-compact', true),
- task = task,
- upstream = upstream,
- use_gzip = true,
- }
-
- rspamd_http.request(http_params)
-end
-
-local function ollama_check(task)
- local ret, content = settings.condition(task)
-
- if not ret then
- rspamd_logger.info(task, "skip checking gpt as the condition is not met: %s", content)
- return
+ if type(settings.model) == 'string' then
+ settings.model = { settings.model }
end
- if not content then
- lua_util.debugm(N, task, "no content to send to gpt classification")
- return
+ upstream = settings.upstreams:get_upstream_round_robin()
+ for idx, model in ipairs(settings.model) do
+ results[idx] = {
+ success = false,
+ checked = false
+ }
+ body.model = model
+ local http_params = {
+ url = settings.url,
+ mime_type = 'application/json',
+ timeout = settings.timeout,
+ log_obj = task,
+ callback = gen_reply_closure(model, idx),
+ headers = {
+ ['Authorization'] = 'Bearer ' .. settings.api_key,
+ },
+ keepalive = true,
+ body = ucl.to_format(body, 'json-compact', true),
+ task = task,
+ upstream = upstream,
+ use_gzip = true,
+ }
+
+ if not rspamd_http.request(http_params) then
+ results[idx].checked = true
+ end
end
+end
+local function ollama_check(task, content, sel_part)
lua_util.debugm(N, task, "sending content to gpt: %s", content)
local upstream
+ local results = {}
+
+ local function gen_reply_closure(model, idx)
+ return function(err, code, body)
+ results[idx].checked = true
+ if err then
+ rspamd_logger.errx(task, '%s: request failed: %s', model, err)
+ upstream:fail()
+ check_consensus_and_insert_results(task, results, sel_part)
+ return
+ end
- local function on_reply(err, code, body)
-
- if err then
- rspamd_logger.errx(task, 'request failed: %s', err)
- upstream:fail()
- return
- end
+ upstream:ok()
+ lua_util.debugm(N, task, "%s: got reply: %s", model, body)
+ if code ~= 200 then
+ rspamd_logger.errx(task, 'bad reply: %s', body)
+ return
+ end
- upstream:ok()
- lua_util.debugm(N, task, "got reply: %s", body)
- if code ~= 200 then
- rspamd_logger.errx(task, 'bad reply: %s', body)
- return
- end
+ local reply, reason = settings.reply_conversion(task, body)
- local reply = settings.reply_conversion(task, body)
- if not reply then
- return
- end
+ results[idx].model = model
- if reply > 0.75 then
- task:insert_result('GPT_SPAM', (reply - 0.75) * 4, tostring(reply))
- if settings.autolearn then
- task:set_flag("learn_spam")
+ if reply then
+ results[idx].success = true
+ results[idx].probability = reply
+ results[idx].reason = reason
end
- elseif reply < 0.25 then
- task:insert_result('GPT_HAM', (0.25 - reply) * 4, tostring(reply))
- if settings.autolearn then
- task:set_flag("learn_ham")
- end
- else
- lua_util.debugm(N, task, "uncertain result: %s", reply)
- end
+ check_consensus_and_insert_results(task, results, sel_part)
+ end
end
local from_content, url_content = get_meta_llm_content(task)
+ if type(settings.model) == 'string' then
+ settings.model = { settings.model }
+ end
+
local body = {
stream = false,
model = settings.model,
@@ -528,55 +821,91 @@ local function ollama_check(task)
}
}
- -- Conditionally add response_format
- if settings.include_response_format then
- body.response_format = { type = "json_object" }
- end
+ for i, model in ipairs(settings.model) do
+ -- Conditionally add response_format
+ if settings.include_response_format then
+ body.response_format = { type = "json_object" }
+ end
- upstream = settings.upstreams:get_upstream_round_robin()
- local http_params = {
- url = settings.url,
- mime_type = 'application/json',
- timeout = settings.timeout,
- log_obj = task,
- callback = on_reply,
- keepalive = true,
- body = ucl.to_format(body, 'json-compact', true),
- task = task,
- upstream = upstream,
- use_gzip = true,
- }
+ results[i] = {
+ success = false,
+ checked = false
+ }
+ body.model = model
+
+ upstream = settings.upstreams:get_upstream_round_robin()
+ local http_params = {
+ url = settings.url,
+ mime_type = 'application/json',
+ timeout = settings.timeout,
+ log_obj = task,
+ callback = gen_reply_closure(model, i),
+ keepalive = true,
+ body = ucl.to_format(body, 'json-compact', true),
+ task = task,
+ upstream = upstream,
+ use_gzip = true,
+ }
- rspamd_http.request(http_params)
+ rspamd_http.request(http_params)
+ end
end
local function gpt_check(task)
- return settings.specific_check(task)
+ local ret, content, sel_part = settings.condition(task)
+
+ if not ret then
+ rspamd_logger.info(task, "skip checking gpt as the condition is not met: %s", content)
+ return
+ end
+
+ if not content then
+ lua_util.debugm(N, task, "no content to send to gpt classification")
+ return
+ end
+
+ if sel_part then
+ -- Check digest
+ check_llm_cached(task, content, sel_part)
+ else
+ check_llm_uncached(task, content)
+ end
end
local types_map = {
openai = {
- check = default_llm_check,
+ check = openai_check,
condition = default_condition,
- conversion = default_conversion,
+ conversion = function(is_json)
+ return is_json and default_openai_json_conversion or default_openai_plain_conversion
+ end,
require_passkey = true,
},
ollama = {
check = ollama_check,
condition = default_condition,
- conversion = ollama_conversion,
+ conversion = function(is_json)
+ return is_json and default_ollama_json_conversion or default_ollama_plain_conversion
+ end,
require_passkey = false,
},
}
-local opts = rspamd_config:get_all_opt('gpt')
+local opts = rspamd_config:get_all_opt(N)
if opts then
+ redis_params = lua_redis.parse_redis_server(N, opts)
settings = lua_util.override_defaults(settings, opts)
- if not settings.prompt then
- settings.prompt = "You will be provided with the email message, subject, from and url domains, " ..
- "and your task is to evaluate the probability to be spam as number from 0 to 1, " ..
- "output result as JSON with 'probability' field."
+ if redis_params then
+ cache_context = lua_cache.create_cache_context(redis_params, settings, N)
+ end
+
+ if not settings.symbols_to_except then
+ settings.symbols_to_except = default_symbols_to_except
+ end
+
+ if not settings.extra_symbols then
+ settings.extra_symbols = default_extra_symbols
end
local llm_type = types_map[settings.type]
@@ -596,7 +925,7 @@ if opts then
if settings.reply_conversion then
settings.reply_conversion = load(settings.reply_conversion)()
else
- settings.reply_conversion = llm_type.conversion
+ settings.reply_conversion = llm_type.conversion(settings.json)
end
if not settings.api_key and llm_type.require_passkey then
@@ -620,7 +949,7 @@ if opts then
name = 'GPT_SPAM',
type = 'virtual',
parent = id,
- score = 5.0,
+ score = 3.0,
})
rspamd_config:register_symbol({
name = 'GPT_HAM',
@@ -628,4 +957,35 @@ if opts then
parent = id,
score = -2.0,
})
+
+ if settings.extra_symbols then
+ for sym, data in pairs(settings.extra_symbols) do
+ rspamd_config:register_symbol({
+ name = sym,
+ type = 'virtual',
+ parent = id,
+ score = data.score,
+ description = data.description,
+ })
+ data.name = sym
+ categories_map[data.category] = data
+ end
+ end
+
+ if not settings.prompt then
+ if settings.extra_symbols then
+ settings.prompt = "Analyze this email strictly as a spam detector given the email message, subject, " ..
+ "FROM and url domains. Evaluate spam probability (0-1). " ..
+ "Output ONLY 3 lines:\n" ..
+ "1. Numeric score (0.00-1.00)\n" ..
+ "2. One-sentence reason citing whether it is spam, the strongest red flag, or why it is ham\n" ..
+ "3. Primary concern category if found from the list: " .. table.concat(lua_util.keys(categories_map), ', ')
+ else
+ settings.prompt = "Analyze this email strictly as a spam detector given the email message, subject, " ..
+ "FROM and url domains. Evaluate spam probability (0-1). " ..
+ "Output ONLY 2 lines:\n" ..
+ "1. Numeric score (0.00-1.00)\n" ..
+ "2. One-sentence reason citing whether it is spam, the strongest red flag, or why it is ham\n"
+ end
+ end
end
diff --git a/src/plugins/lua/greylist.lua b/src/plugins/lua/greylist.lua
index e4a633233..934e17bce 100644
--- a/src/plugins/lua/greylist.lua
+++ b/src/plugins/lua/greylist.lua
@@ -122,6 +122,29 @@ local function data_key(task)
local h = hash.create()
h:update(body, len)
+ local subject = task:get_subject() or ''
+ h:update(subject)
+
+ -- Take recipients into account
+ local rcpt = task:get_recipients('smtp')
+ if rcpt then
+ table.sort(rcpt, function(r1, r2)
+ return r1['addr'] < r2['addr']
+ end)
+
+ fun.each(function(r)
+ h:update(r['addr'])
+ end, rcpt)
+ end
+
+ -- Use from as well, but mime one
+ local from = task:get_from('mime')
+
+ local addr = '<>'
+ if from and from[1] then
+ addr = from[1]['addr']
+ end
+ h:update(addr)
local b32 = settings['key_prefix'] .. 'b' .. h:base32():sub(1, 20)
task:get_mempool():set_variable("grey_bodyhash", b32)
diff --git a/src/plugins/lua/hfilter.lua b/src/plugins/lua/hfilter.lua
index 6bc011b83..32102e4f8 100644
--- a/src/plugins/lua/hfilter.lua
+++ b/src/plugins/lua/hfilter.lua
@@ -131,6 +131,7 @@ local checks_hellohost = [[
/modem[.-][0-9]/i 5
/[0-9][.-]?dhcp/i 5
/wifi[.-][0-9]/i 5
+/[.-]vps[.-]/i 1
]]
local checks_hellohost_map
@@ -199,9 +200,10 @@ local function check_regexp(str, regexp_text)
return re:match(str)
end
-local function add_static_map(data)
+local function add_static_map(data, description)
return rspamd_config:add_map {
type = 'regexp_multi',
+ description = description,
url = {
upstreams = 'static',
data = data,
@@ -568,16 +570,16 @@ local function append_t(t, a)
end
end
if config['helo_enabled'] then
- checks_hello_bareip_map = add_static_map(checks_hello_bareip)
- checks_hello_badip_map = add_static_map(checks_hello_badip)
- checks_hellohost_map = add_static_map(checks_hellohost)
- checks_hello_map = add_static_map(checks_hello)
+ checks_hello_bareip_map = add_static_map(checks_hello_bareip, 'Hfilter: HELO bare ip')
+ checks_hello_badip_map = add_static_map(checks_hello_badip, 'Hfilter: HELO bad ip')
+ checks_hellohost_map = add_static_map(checks_hellohost, 'Hfilter: HELO host')
+ checks_hello_map = add_static_map(checks_hello, 'Hfilter: HELO')
append_t(symbols_enabled, symbols_helo)
timeout = math.max(timeout, rspamd_config:get_dns_timeout() * 3)
end
if config['hostname_enabled'] then
if not checks_hellohost_map then
- checks_hellohost_map = add_static_map(checks_hellohost)
+ checks_hellohost_map = add_static_map(checks_hellohost, 'Hfilter: HOSTNAME')
end
append_t(symbols_enabled, symbols_hostname)
timeout = math.max(timeout, rspamd_config:get_dns_timeout())
diff --git a/src/plugins/lua/history_redis.lua b/src/plugins/lua/history_redis.lua
index a3fdb0ec4..44eb40ad9 100644
--- a/src/plugins/lua/history_redis.lua
+++ b/src/plugins/lua/history_redis.lua
@@ -138,7 +138,7 @@ end
local function history_save(task)
local function redis_llen_cb(err, _)
if err then
- rspamd_logger.errx(task, 'got error %s when writing history row: %s',
+ rspamd_logger.errx(task, 'got error %s when writing history row',
err)
end
end
@@ -188,7 +188,7 @@ local function handle_history_request(task, conn, from, to, reset)
if reset then
local function redis_ltrim_cb(err, _)
if err then
- rspamd_logger.errx(task, 'got error %s when resetting history: %s',
+ rspamd_logger.errx(task, 'got error %s when resetting history',
err)
conn:send_error(504, '{"error": "' .. err .. '"}')
else
@@ -258,7 +258,7 @@ local function handle_history_request(task, conn, from, to, reset)
(rspamd_util:get_ticks() - t1) * 1000.0)
collectgarbage()
else
- rspamd_logger.errx(task, 'got error %s when getting history: %s',
+ rspamd_logger.errx(task, 'got error %s when getting history',
err)
conn:send_error(504, '{"error": "' .. err .. '"}')
end
diff --git a/src/plugins/lua/known_senders.lua b/src/plugins/lua/known_senders.lua
index 5cb2ddcf5..0cbf3cdcf 100644
--- a/src/plugins/lua/known_senders.lua
+++ b/src/plugins/lua/known_senders.lua
@@ -106,21 +106,26 @@ local function configure_scripts(_, _, _)
-- script checks if given recipients are in the local replies set of the sender
local redis_zscore_script = [[
local replies_recipients_addrs = ARGV
- if replies_recipients_addrs then
+ if replies_recipients_addrs and #replies_recipients_addrs > 0 then
+ local found = false
for _, rcpt in ipairs(replies_recipients_addrs) do
local score = redis.call('ZSCORE', KEYS[1], rcpt)
- -- check if score is nil (for some reason redis script does not see if score is a nil value)
- if type(score) == 'boolean' then
- score = nil
- -- 0 is stand for failure code
- return 0
+ if score then
+ -- If we found at least one recipient, consider it a match
+ found = true
+ break
end
end
- -- first number in return statement is stands for the success/failure code
- -- where success code is 1 and failure code is 0
- return 1
+
+ if found then
+ -- Success code is 1
+ return 1
+ else
+ -- Failure code is 0
+ return 0
+ end
else
- -- 0 is a failure code
+ -- No recipients to check, failure code is 0
return 0
end
]]
@@ -259,7 +264,13 @@ local function verify_local_replies_set(task)
return nil
end
- local replies_recipients = task:get_recipients('mime') or E
+ local replies_recipients = task:get_recipients('smtp') or E
+
+ -- If no recipients, don't proceed
+ if #replies_recipients == 0 then
+ lua_util.debugm(N, task, 'No recipients to verify')
+ return nil
+ end
local replies_sender_string = lua_util.maybe_obfuscate_string(tostring(replies_sender), settings,
settings.sender_prefix)
@@ -268,13 +279,16 @@ local function verify_local_replies_set(task)
local function redis_zscore_script_cb(err, data)
if err ~= nil then
rspamd_logger.errx(task, 'Could not verify %s local replies set %s', replies_sender_key, err)
- end
- if data ~= 1 then
- lua_util.debugm(N, task, 'Recipients were not verified')
return
end
- lua_util.debugm(N, task, 'Recipients were verified')
- task:insert_result(settings.symbol_check_mail_local, 1.0, replies_sender_key)
+
+ -- We need to ensure we're properly checking the result
+ if data == 1 then
+ lua_util.debugm(N, task, 'Recipients were verified')
+ task:insert_result(settings.symbol_check_mail_local, 1.0, replies_sender_key)
+ else
+ lua_util.debugm(N, task, 'Recipients were not verified, data=%s', data)
+ end
end
local replies_recipients_addrs = {}
@@ -284,12 +298,24 @@ local function verify_local_replies_set(task)
table.insert(replies_recipients_addrs, replies_recipients[i].addr)
end
- lua_util.debugm(N, task, 'Making redis request to local replies set')
- lua_redis.exec_redis_script(zscore_script_id,
+ -- Only proceed if we have recipients to check
+ if #replies_recipients_addrs == 0 then
+ lua_util.debugm(N, task, 'No recipient addresses to verify')
+ return nil
+ end
+
+ lua_util.debugm(N, task, 'Making redis request to local replies set with key %s and recipients %s',
+ replies_sender_key, table.concat(replies_recipients_addrs, ", "))
+
+ local ret = lua_redis.exec_redis_script(zscore_script_id,
{ task = task, is_write = true },
redis_zscore_script_cb,
{ replies_sender_key },
replies_recipients_addrs)
+
+ if not ret then
+ rspamd_logger.errx(task, "redis script request wasn't scheduled")
+ end
end
local function check_known_incoming_mail_callback(task)
diff --git a/src/plugins/lua/milter_headers.lua b/src/plugins/lua/milter_headers.lua
index 2daeeed78..17fc90562 100644
--- a/src/plugins/lua/milter_headers.lua
+++ b/src/plugins/lua/milter_headers.lua
@@ -138,7 +138,7 @@ local function milter_headers(task)
local function skip_wanted(hdr)
if settings_override then
- return true
+ return false
end
-- Normal checks
local function match_extended_headers_rcpt()
diff --git a/src/plugins/lua/mime_types.lua b/src/plugins/lua/mime_types.lua
index c69fa1e7b..73cd63c6a 100644
--- a/src/plugins/lua/mime_types.lua
+++ b/src/plugins/lua/mime_types.lua
@@ -128,6 +128,7 @@ local settings = {
inf = 4,
its = 4,
jnlp = 4,
+ ['library-ms'] = 4,
lnk = 4,
ksh = 4,
mad = 4,
@@ -179,6 +180,7 @@ local settings = {
reg = 4,
scf = 4,
scr = 4,
+ ['search-ms'] = 4,
shs = 4,
theme = 4,
url = 4,
@@ -406,9 +408,9 @@ local function check_mime_type(task)
local score2 = check_tables(ext2)
-- Check if detected extension match real extension
if detected_ext and detected_ext == ext then
- check_extension(score1, nil)
+ check_extension(score1, nil)
else
- check_extension(score1, score2)
+ check_extension(score1, score2)
end
-- Check for archive cloaking like .zip.gz
if settings['archive_extensions'][ext2]
diff --git a/src/plugins/lua/multimap.lua b/src/plugins/lua/multimap.lua
index a61da606b..8bb62bef1 100644
--- a/src/plugins/lua/multimap.lua
+++ b/src/plugins/lua/multimap.lua
@@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-]]--
+]] --
if confighelp then
return
@@ -34,6 +34,16 @@ local redis_params
local fun = require "fun"
local N = 'multimap'
+-- SpamAssassin-like functionality
+local sa_atoms = {}
+local sa_scores = {}
+local sa_meta_rules = {}
+local sa_descriptions = {}
+
+-- Symbol state tracking for graceful map reloads
+-- States: 'available', 'loading', 'orphaned'
+local regexp_rules_symbol_states = {}
+
local multimap_grammar
-- Parse result in form: <symbol>:<score>|<symbol>|<score>
local function parse_multimap_value(parse_rule, p_ret)
@@ -54,7 +64,7 @@ local function parse_multimap_value(parse_rule, p_ret)
-- Matches: 55.97, -90.8, .9
number.decimal = (number.integer * -- Integer
(number.fractional ^ -1)) + -- Fractional
- (lpeg.S("+-") * number.fractional) -- Completely fractional number
+ (lpeg.S("+-") * number.fractional) -- Completely fractional number
local sym_start = lpeg.R("az", "AZ") + lpeg.S("_")
local sym_elt = sym_start + lpeg.R("09")
@@ -100,6 +110,607 @@ local function parse_multimap_value(parse_rule, p_ret)
return false, nil, 0.0, {}
end
+-- SpamAssassin-like line processing functions
+local function split_sa_line(str)
+ local result = {}
+ if not str then
+ return result
+ end
+
+ for token in string.gmatch(str, '%S+') do
+ table.insert(result, token)
+ end
+
+ return result
+end
+
+local function parse_sa_regexp(rule_symbol, re_expr)
+ -- Extract regexp and flags from /regexp/flags format
+ local re_str, flags = string.match(re_expr, '^/(.+)/([gimxsiu]*)$')
+ if not re_str then
+ re_str, flags = string.match(re_expr, '^m{(.+)}([gimxsiu]*)$')
+ end
+ if not re_str then
+ -- Try without delimiters
+ re_str = re_expr
+ flags = ''
+ end
+
+ if flags and flags ~= '' then
+ re_str = '(?' .. flags .. ')' .. re_str
+ end
+
+ local re = rspamd_regexp.create(re_str)
+ if not re then
+ rspamd_logger.errx(rspamd_config, 'cannot create regexp for %s: %s', rule_symbol, re_expr)
+ return nil
+ end
+
+ return re
+end
+
+local function words_to_sa_re(words, start)
+ return table.concat(fun.totable(fun.drop_n(start, words)), " ")
+end
+
+-- Helper function to create SA rule callbacks
+local function create_sa_atom_function(name, re, match_type, opts)
+ return function(task)
+ if not re then
+ rspamd_logger.errx(task, 're is missing for atom %s', name)
+ return 0
+ end
+
+ local function process_re_match(re_obj, tsk, re_type, header, strong)
+ local res = 0
+ if type(jit) == 'table' then
+ res = tsk:process_regexp(re_obj, re_type, header, strong)
+ else
+ res = tsk:process_regexp(re_obj, re_type, header, strong)
+ end
+ return res
+ end
+
+ local ret = 0
+
+ if match_type == 'header' then
+ ret = process_re_match(re, task, 'header', opts.header, opts.strong or false)
+ elseif match_type == 'body' then
+ ret = process_re_match(re, task, 'sabody')
+ elseif match_type == 'rawbody' then
+ ret = process_re_match(re, task, 'sarawbody')
+ elseif match_type == 'full' then
+ ret = process_re_match(re, task, 'body')
+ elseif match_type == 'uri' then
+ ret = process_re_match(re, task, 'url')
+ else
+ -- Default to body
+ ret = process_re_match(re, task, 'sabody')
+ end
+
+ if opts and opts.negate then
+ -- Negate the result for !~ operators
+ ret = (ret > 0) and 0 or 1
+ lua_util.debugm(N, task, 'SA atom %s negated result: %s', name, ret)
+ end
+
+ lua_util.debugm(N, task, 'SA atom %s result: %s', name, ret)
+ return ret
+ end
+end
+
+local function process_sa_line(rule, line)
+ line = lua_util.str_trim(line)
+
+ if string.len(line) == 0 or string.sub(line, 1, 1) == '#' then
+ return
+ end
+
+ -- Add debug logging
+ lua_util.debugm(N, rspamd_config, 'Processing SA line for rule %s: %s', rule.symbol, line)
+
+ local words = split_sa_line(line)
+ if not words or #words == 0 then
+ lua_util.debugm(N, rspamd_config, 'Skipping empty or invalid line: %s', line)
+ return
+ end
+
+ local rule_name = rule.symbol
+ local scope_name = rule.scope_name or rule_name
+
+ -- All regexps for this SA-style rule are registered in a dedicated scope
+ -- This allows clean removal and replacement when the map is reloaded
+
+ if words[1] == 'header' then
+ -- header SYMBOL Header =~ /regexp/flags
+ if #words >= 4 and (words[4] == '=~' or words[4] == '!~') then
+ local atom_name = words[2]
+ local header_name = words[3]
+ local re_expr = words_to_sa_re(words, 4)
+
+ -- Skip =~ or !~
+ re_expr = string.gsub(re_expr, '^[!=]~%s*', '')
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ -- Register regexp with cache in specific scope
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'header',
+ header = header_name,
+ pcre_only = false,
+ })
+
+ re:set_limit(0) -- No limit
+ re:set_max_hits(1)
+
+ local negate = (words[4] == '!~')
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'header', {
+ header = header_name,
+ strong = false,
+ negate = negate
+ })
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA header atom: %s for header %s (scope: %s)',
+ atom_name, header_name, scope_name)
+ end
+ end
+ elseif words[1] == 'body' then
+ -- body SYMBOL /regexp/flags
+ if #words >= 3 then
+ local atom_name = words[2]
+ local re_expr = words_to_sa_re(words, 2)
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'sabody',
+ pcre_only = false,
+ })
+
+ re:set_limit(0)
+ re:set_max_hits(1)
+
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'body', {})
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA body atom: %s (scope: %s)', atom_name, scope_name)
+ end
+ end
+ elseif words[1] == 'rawbody' then
+ -- rawbody SYMBOL /regexp/flags
+ if #words >= 3 then
+ local atom_name = words[2]
+ local re_expr = words_to_sa_re(words, 2)
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'sarawbody',
+ pcre_only = false,
+ })
+
+ re:set_limit(0)
+ re:set_max_hits(1)
+
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'rawbody', {})
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA rawbody atom: %s (scope: %s)', atom_name, scope_name)
+ end
+ end
+ elseif words[1] == 'uri' then
+ -- uri SYMBOL /regexp/flags
+ if #words >= 3 then
+ local atom_name = words[2]
+ local re_expr = words_to_sa_re(words, 2)
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'url',
+ pcre_only = false,
+ })
+
+ re:set_limit(0)
+ re:set_max_hits(1)
+
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'uri', {})
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA uri atom: %s (scope: %s)', atom_name, scope_name)
+ end
+ end
+ elseif words[1] == 'full' then
+ -- full SYMBOL /regexp/flags
+ if #words >= 3 then
+ local atom_name = words[2]
+ local re_expr = words_to_sa_re(words, 2)
+
+ local re = parse_sa_regexp(atom_name, re_expr)
+ if re then
+ rspamd_config:register_regexp_scoped(scope_name, {
+ re = re,
+ type = 'body',
+ pcre_only = false,
+ })
+
+ re:set_limit(0)
+ re:set_max_hits(1)
+
+ sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'full', {})
+
+ -- Track atom state
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'atom'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA full atom: %s (scope: %s)', atom_name, scope_name)
+ end
+ end
+ elseif words[1] == 'meta' then
+ -- meta SYMBOL expression
+ if #words >= 3 then
+ local meta_name = words[2]
+ local meta_expr = words_to_sa_re(words, 2)
+
+ sa_meta_rules[meta_name] = {
+ symbol = meta_name,
+ expression = meta_expr,
+ rule_name = rule_name
+ }
+
+ -- Track symbol state
+ regexp_rules_symbol_states[meta_name] = {
+ state = 'loading',
+ rule_name = rule_name,
+ type = 'meta'
+ }
+
+ lua_util.debugm(N, rspamd_config, 'added SA meta rule: %s = %s', meta_name, meta_expr)
+ end
+ elseif words[1] == 'score' then
+ -- score SYMBOL value
+ if #words >= 3 then
+ local score_symbol = words[2]
+ local score_value = tonumber(words[3])
+
+ if score_value then
+ sa_scores[score_symbol] = score_value
+ lua_util.debugm(N, rspamd_config, 'added SA score: %s = %s', score_symbol, score_value)
+ end
+ end
+ elseif words[1] == 'describe' then
+ -- describe SYMBOL description text
+ if #words >= 3 then
+ local desc_symbol = words[2]
+ local desc_text = words_to_sa_re(words, 2)
+
+ sa_descriptions[desc_symbol] = desc_text
+ lua_util.debugm(N, rspamd_config, 'added SA description: %s = %s', desc_symbol, desc_text)
+ end
+ end
+end
+
+local function parse_sa_atom(str)
+ local atom = table.concat(fun.totable(fun.take_while(function(c)
+ if string.find(', \t()><+!|&\n', c, 1, true) then
+ return false
+ end
+ return true
+ end, fun.iter(str))), '')
+
+ return atom
+end
+
+-- Forward declaration for mutual recursion
+local create_sa_meta_callback
+
+local function gen_sa_process_atom_cb(task, rule_name)
+ return function(atom)
+ -- Check symbol state first
+ local state_info = regexp_rules_symbol_states[atom]
+ if state_info then
+ if state_info.state == 'orphaned' or state_info.state == 'loading' then
+ -- Double-check by looking at scope loaded state
+ local scope_loaded = false
+ for _, rule in ipairs(rules) do
+ if rule.symbol == state_info.rule_name and rule.scope_name then
+ scope_loaded = rspamd_config:is_regexp_scope_loaded(rule.scope_name)
+ break
+ end
+ end
+
+ if scope_loaded and (state_info.type == 'atom' and sa_atoms[atom]) then
+ -- Update state to available if scope is loaded and atom exists
+ state_info.state = 'available'
+ lua_util.debugm(N, task, 'regexp_rules atom %s was %s, but scope is loaded - marking as available',
+ atom, state_info.state)
+ else
+ lua_util.debugm(N, task, 'regexp_rules atom %s is %s, returning 0', atom, state_info.state)
+ return 0
+ end
+ end
+ end
+
+ local atom_cb = sa_atoms[atom]
+
+ if atom_cb then
+ local res = atom_cb(task)
+
+ -- Return result without logging each atom
+ return res
+ else
+ -- Check if this is a SA meta rule
+ local meta_rule = sa_meta_rules[atom]
+ if meta_rule then
+ local meta_cb = create_sa_meta_callback(meta_rule)
+ local res = meta_cb(task)
+ return res or 0
+ end
+
+ -- External atom - check if task has this symbol
+ if task:has_symbol(atom) then
+ return 1
+ end
+ end
+ return 0
+ end
+end
+
+create_sa_meta_callback = function(meta_rule)
+ return function(task)
+ -- Check symbol state before execution
+ local state_info = regexp_rules_symbol_states[meta_rule.symbol]
+ if state_info then
+ if state_info.state == 'orphaned' or state_info.state == 'loading' then
+ -- Double-check by looking at scope loaded state
+ local scope_loaded = false
+ for _, rule in ipairs(rules) do
+ if rule.symbol == state_info.rule_name and rule.scope_name then
+ scope_loaded = rspamd_config:is_regexp_scope_loaded(rule.scope_name)
+ break
+ end
+ end
+
+ if scope_loaded and sa_meta_rules[meta_rule.symbol] then
+ -- Update state to available if scope is loaded and meta rule exists
+ state_info.state = 'available'
+ lua_util.debugm(N, task, 'regexp_rules meta %s was %s, but scope is loaded - marking as available',
+ meta_rule.symbol, state_info.state)
+ else
+ lua_util.debugm(N, task, 'regexp_rules meta %s is %s, skipping execution',
+ meta_rule.symbol, state_info.state)
+ return 0
+ end
+ end
+ end
+
+ local cached = task:cache_get('sa_multimap_metas_processed')
+
+ if not cached then
+ cached = {}
+ task:cache_set('sa_multimap_metas_processed', cached)
+ end
+
+ local function exclude_sym_filter(sopt)
+ -- Exclude self and atoms starting with __
+ return sopt ~= meta_rule.symbol
+ end
+
+ local already_processed = cached[meta_rule.symbol]
+
+ if not (already_processed and already_processed['default']) then
+ local expression = rspamd_expression.create(meta_rule.expression,
+ parse_sa_atom,
+ rspamd_config:get_mempool())
+ if not expression then
+ rspamd_logger.errx(rspamd_config, 'Cannot parse SA meta expression: %s', meta_rule.expression)
+ return
+ end
+
+ local function exec_symbol(cur_res)
+ local res, trace = expression:process_traced(gen_sa_process_atom_cb(task, meta_rule.rule_name))
+
+ if res > 0 then
+ local filtered_trace = fun.totable(fun.take_n(5,
+ fun.map(function(elt)
+ return elt:gsub('^__', '')
+ end, fun.filter(exclude_sym_filter, trace))))
+ lua_util.debugm(N, task, 'SA meta %s matched with result: %s; trace %s; filtered trace %s',
+ meta_rule.symbol, res, trace, filtered_trace)
+ task:insert_result_named(cur_res, meta_rule.symbol, 1.0, filtered_trace)
+ end
+
+ if not cached[meta_rule.symbol] then
+ cached[meta_rule.symbol] = {}
+ end
+ cached[meta_rule.symbol][cur_res] = res
+
+ return res
+ end
+
+ -- Invoke for all named results
+ local named_results = task:get_all_named_results()
+ for _, cur_res in ipairs(named_results) do
+ exec_symbol(cur_res)
+ end
+ else
+ -- We have cached the result
+ local res = already_processed['default'] or 0
+ lua_util.debugm(N, task, 'cached SA meta result for %s: %s', meta_rule.symbol, res)
+ end
+ end
+end
+
+-- Initialize SA meta rules after all atoms are processed
+local function finalize_sa_rules()
+ lua_util.debugm(N, rspamd_config, 'Finalizing SA rules - processing %s meta rules',
+ fun.length(sa_meta_rules))
+
+ for meta_name, meta_rule in pairs(sa_meta_rules) do
+ local score = sa_scores[meta_name] or 1.0
+ local description = sa_descriptions[meta_name] or ('multimap symbol ' .. meta_name)
+
+ lua_util.debugm(N, rspamd_config, 'Registering SA meta rule %s (score: %s, expression: %s)',
+ meta_name, score, meta_rule.expression)
+
+ local id = rspamd_config:register_symbol({
+ name = meta_name,
+ weight = score,
+ callback = create_sa_meta_callback(meta_rule),
+ type = 'normal',
+ flags = 'one_shot',
+ augmentations = {},
+ })
+
+ lua_util.debugm(N, rspamd_config, 'Successfully registered SA meta symbol %s with id %s (callback attached)',
+ meta_name, id)
+
+ rspamd_config:set_metric_symbol({
+ name = meta_name,
+ score = score,
+ description = description,
+ group = N,
+ })
+
+ -- Also register meta rule as an atom so it can be used in other meta expressions
+ sa_atoms[meta_name] = create_sa_meta_callback(meta_rule)
+
+ -- Mark symbol as available
+ if regexp_rules_symbol_states[meta_name] then
+ regexp_rules_symbol_states[meta_name].state = 'available'
+ else
+ regexp_rules_symbol_states[meta_name] = {
+ state = 'available',
+ rule_name = meta_rule.rule_name,
+ type = 'meta'
+ }
+ end
+
+ lua_util.debugm(N, rspamd_config, 'registered SA meta symbol: %s (score: %s)',
+ meta_name, score)
+ end
+
+ -- Mark orphaned symbols - only check meta symbols (not atoms) since atoms are just expression parts
+ for symbol, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' and state_info.state == 'available' and not sa_meta_rules[symbol] then
+ state_info.state = 'orphaned'
+ state_info.orphaned_at = os.time()
+ lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as orphaned', symbol)
+ end
+ end
+
+ lua_util.debugm(N, rspamd_config, 'SA rules finalization complete: registered %s meta rules with callbacks',
+ fun.length(sa_meta_rules))
+end
+
+-- Helper function to get regexp_rules symbol state statistics (only meta symbols, not atoms)
+local function get_regexp_rules_symbol_stats()
+ local stats = {
+ available = 0,
+ loading = 0,
+ orphaned = 0,
+ total = 0
+ }
+
+ for _, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' then
+ stats[state_info.state] = (stats[state_info.state] or 0) + 1
+ stats.total = stats.total + 1
+ end
+ end
+
+ return stats
+end
+
+-- Helper function to synchronize symbol states with loaded scopes
+local function sync_regexp_rules_symbol_states()
+ lua_util.debugm(N, rspamd_config, 'Synchronizing regexp_rules symbol states with loaded scopes')
+
+ -- Check each rule to see if its scope is loaded
+ for _, rule in ipairs(rules) do
+ if rule.type == 'regexp_rules' and rule.scope_name then
+ local scope_loaded = rspamd_config:is_regexp_scope_loaded(rule.scope_name)
+
+ if scope_loaded then
+ -- Mark all meta symbols for this rule as available (atoms are just expression parts)
+ local updated_count = 0
+ for _, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' and state_info.rule_name == rule.symbol and state_info.state ~= 'available' then
+ state_info.state = 'available'
+ updated_count = updated_count + 1
+ end
+ end
+
+ lua_util.debugm(N, rspamd_config, 'Scope %s is loaded, marked %s symbols as available',
+ rule.scope_name, updated_count)
+ else
+ lua_util.debugm(N, rspamd_config, 'Scope %s is not loaded', rule.scope_name)
+ end
+ end
+ end
+
+ local stats = get_regexp_rules_symbol_stats()
+ lua_util.debugm(N, rspamd_config, 'Symbol state stats after sync: available=%s, loading=%s, orphaned=%s, total=%s',
+ stats.available, stats.loading, stats.orphaned, stats.total)
+end
+
+-- Optional cleanup function to remove old orphaned symbols (can be called periodically)
+local function cleanup_orphaned_regexp_rules_symbols(max_age_seconds)
+ max_age_seconds = max_age_seconds or 3600 -- Default to 1 hour
+ local current_time = os.time()
+ local removed = 0
+
+ for symbol, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' and state_info.state == 'orphaned' and state_info.orphaned_at then
+ if (current_time - state_info.orphaned_at) > max_age_seconds then
+ regexp_rules_symbol_states[symbol] = nil
+ -- Only meta rules should be cleaned up from sa_meta_rules
+ sa_meta_rules[symbol] = nil
+ removed = removed + 1
+ lua_util.debugm(N, rspamd_config, 'cleaned up orphaned regexp_rules symbol: %s', symbol)
+ end
+ end
+ end
+
+ if removed > 0 then
+ lua_util.debugm(N, rspamd_config, 'cleaned up %s orphaned regexp_rules symbols', removed)
+ end
+
+ return removed
+end
+
local value_types = {
ip = {
get_value = function(ip)
@@ -531,7 +1142,7 @@ local function multimap_query_redis(key, task, value, callback)
false, -- is write
redis_map_cb, --callback
cmd, -- command
- srch -- arguments
+ srch -- arguments
)
end
@@ -631,7 +1242,6 @@ local function multimap_callback(task, rule)
else
task:insert_result(forced, symbol, score, tostring(opt))
end
-
else
task:insert_result(forced, symbol, score)
end
@@ -671,7 +1281,6 @@ local function multimap_callback(task, rule)
local fn = multimap_filters[r.type]
if fn then
-
local filtered_value = fn(task, r.filter, value, r)
lua_util.debugm(N, task, 'apply filter %s for rule %s: %s -> %s',
r.filter, r.symbol, value, filtered_value)
@@ -1097,6 +1706,12 @@ local function multimap_callback(task, rule)
end
end
end,
+ regexp_rules = function()
+ -- For regexp_rules, the meta rules are registered as separate symbols
+ -- This is just a placeholder callback
+ lua_util.debugm(N, task, 'Regexp rules callback for %s - meta rules are registered as separate symbols',
+ rule.symbol)
+ end,
}
local rt = rule.type
@@ -1184,7 +1799,8 @@ local function add_multimap_rule(key, newrule)
country = true,
mempool = true,
selector = true,
- combined = true
+ combined = true,
+ regexp_rules = true
}
if newrule['message_func'] then
@@ -1267,6 +1883,7 @@ local function add_multimap_rule(key, newrule)
{
rules = newrule.rules,
expression = newrule.expression,
+ description = newrule.description,
on_load = newrule.dynamic_symbols and multimap_on_load_gen(newrule) or nil,
}, N, 'Combined map for ' .. newrule.symbol)
if not newrule.combined then
@@ -1274,6 +1891,145 @@ local function add_multimap_rule(key, newrule)
else
ret = true
end
+ elseif newrule.type == 'regexp_rules' then
+ -- SpamAssassin-like map processing using callback map with line-by-line processing
+ local map_ucl = newrule.map
+ if type(map_ucl) == 'string' then
+ -- Convert string URL to UCL format
+ map_ucl = {
+ url = map_ucl,
+ description = newrule.description
+ }
+ elseif type(map_ucl) == 'table' and not map_ucl.url and not map_ucl.urls then
+ rspamd_logger.errx(rspamd_config, 'SA map %s has no URL defined', newrule.symbol)
+ return nil
+ end
+
+ -- Set scope name for this regexp_rules map
+ local scope_name = newrule.symbol
+ newrule.scope_name = scope_name
+
+ -- Remove existing scope if it exists to ensure clean state
+ if rspamd_config:find_regexp_scope(scope_name) then
+ lua_util.debugm(N, rspamd_config, 'removing existing regexp scope: %s', scope_name)
+ rspamd_config:remove_regexp_scope(scope_name)
+ end
+
+ -- Mark the scope as unloaded during map processing
+ -- The scope will be created automatically when first regexp is added
+ local first_line_processed = false
+
+ -- Create callback map with by_line processing
+ newrule.map_obj = rspamd_config:add_map({
+ type = "callback",
+ url = map_ucl.url or map_ucl.urls or map_ucl,
+ description = newrule.description or 'SA-style multimap: ' .. newrule.symbol,
+ callback = function(pseudo_key, pseudo_value)
+ -- We have values being parsed as kv pairs, but they are not, so we concat them and use as a line
+ local line = pseudo_key .. ' ' .. pseudo_value
+ -- Add debug logging to see if callback is called
+ lua_util.debugm(N, rspamd_config, 'regexp_rules callback called for line: %s', line)
+
+ -- Mark scope as unloaded on first line
+ if not first_line_processed then
+ first_line_processed = true
+ lua_util.debugm(N, rspamd_config, 'processing first line of regexp_rules map %s', newrule.symbol)
+
+ -- Mark all existing symbols for this scope as loading
+ for symbol, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.rule_name == newrule.symbol then
+ state_info.state = 'loading'
+ lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as loading for scope %s reload',
+ symbol, scope_name)
+ end
+ end
+
+ -- Clear atoms and meta rules for this scope
+ local symbols_to_remove = {}
+ for symbol, _ in pairs(sa_meta_rules) do
+ if regexp_rules_symbol_states[symbol] and regexp_rules_symbol_states[symbol].rule_name == newrule.symbol then
+ table.insert(symbols_to_remove, symbol)
+ end
+ end
+
+ for _, symbol in ipairs(symbols_to_remove) do
+ sa_atoms[symbol] = nil
+ sa_meta_rules[symbol] = nil
+ lua_util.debugm(N, rspamd_config, 'cleared regexp_rules symbol %s for scope %s reload',
+ symbol, scope_name)
+ end
+
+ -- The scope will be created by process_sa_line when first regexp is added
+ -- We mark it as unloaded immediately after creation
+ rspamd_config:set_regexp_scope_loaded(scope_name, false)
+ lua_util.debugm(N, rspamd_config, 'marked regexp scope %s as unloaded during processing', scope_name)
+ end
+ process_sa_line(newrule, line)
+ end,
+ by_line = true, -- Process line by line
+ opaque_data = false, -- Use plain strings
+ })
+
+ -- Add on_load callback to mark scope as loaded when map processing is complete
+ if newrule.map_obj then
+ newrule.map_obj:on_load(function()
+ lua_util.debugm(N, rspamd_config, 'regexp_rules map %s loaded successfully', newrule.symbol)
+
+ -- Mark all meta symbols for this scope as available (atoms are just expression parts)
+ for symbol, state_info in pairs(regexp_rules_symbol_states) do
+ if state_info.type == 'meta' and state_info.rule_name == newrule.symbol then
+ if state_info.state == 'loading' then
+ -- Check if this meta symbol still exists in the rules
+ if sa_meta_rules[symbol] then
+ state_info.state = 'available'
+ lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as available after map load', symbol)
+ else
+ -- Symbol was removed in the new map
+ state_info.state = 'orphaned'
+ state_info.orphaned_at = os.time()
+ lua_util.debugm(N, rspamd_config, 'marked regexp_rules symbol %s as orphaned after map load', symbol)
+ end
+ end
+ end
+ end
+
+ -- Mark scope as loaded when map processing is complete
+ -- Check if scope exists (it might not if map was empty)
+ if rspamd_config:find_regexp_scope(scope_name) then
+ rspamd_config:set_regexp_scope_loaded(scope_name, true)
+ lua_util.debugm(N, rspamd_config, 'marked regexp scope %s as loaded after map processing', scope_name)
+
+ -- Trigger hyperscan compilation for this updated scope
+ newrule.map_obj:trigger_hyperscan_compilation()
+ lua_util.debugm(N, rspamd_config, 'triggered hyperscan compilation for scope %s after map loading',
+ scope_name)
+ else
+ lua_util.debugm(N, rspamd_config, 'regexp scope %s not created (empty map)', scope_name)
+ end
+
+ -- Synchronize symbol states after map load to ensure all processes see correct states
+ sync_regexp_rules_symbol_states()
+
+ -- Finalize SA rules immediately after map load
+ finalize_sa_rules()
+
+ -- Promote symcache resort after dynamic symbol registration
+ rspamd_config:promote_symbols_cache_resort()
+ lua_util.debugm(N, rspamd_config, 'promoted symcache resort after loading SA rules from map %s',
+ newrule.symbol)
+ end)
+ end
+
+ if newrule.map_obj then
+ -- Mark this rule as using SA functionality
+ newrule.uses_sa = true
+ lua_util.debugm(N, rspamd_config, 'created regexp_rules map %s with scope: %s',
+ newrule.symbol, scope_name)
+ ret = true
+ else
+ rspamd_logger.warnx(rspamd_config, 'Cannot add SA-style rule: map doesn\'t exists: %s',
+ newrule['map'])
+ end
else
if newrule['type'] == 'ip' then
newrule.map_obj = lua_maps.map_add_from_ucl(newrule.map, 'radix',
@@ -1281,7 +2037,7 @@ local function add_multimap_rule(key, newrule)
if newrule.map_obj then
ret = true
else
- rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1',
+ rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s',
newrule['map'])
end
elseif newrule['type'] == 'received' then
@@ -1302,7 +2058,7 @@ local function add_multimap_rule(key, newrule)
if newrule.map_obj then
ret = true
else
- rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1',
+ rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s',
newrule['map'])
end
else
@@ -1311,12 +2067,11 @@ local function add_multimap_rule(key, newrule)
if newrule.map_obj then
ret = true
else
- rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1',
+ rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s',
newrule['map'])
end
end
elseif known_generic_types[newrule.type] then
-
if newrule.filter == 'ip_addr' then
newrule.map_obj = lua_maps.map_add_from_ucl(newrule.map, 'radix',
newrule.description)
@@ -1327,11 +2082,14 @@ local function add_multimap_rule(key, newrule)
if newrule.map_obj then
ret = true
else
- rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %1',
+ rspamd_logger.warnx(rspamd_config, 'Cannot add rule: map doesn\'t exists: %s',
newrule['map'])
end
elseif newrule['type'] == 'dnsbl' then
ret = true
+ else
+ rspamd_logger.errx(rspamd_config, 'cannot add rule %s: invalid type %s',
+ key, newrule['type'])
end
end
@@ -1389,6 +2147,29 @@ end
local opts = rspamd_config:get_all_opt(N)
if opts and type(opts) == 'table' then
redis_params = rspamd_parse_redis_server(N)
+
+ -- Initialize regexp_rules symbol states from existing sa_atoms and sa_meta_rules
+ -- This helps with module reload scenarios
+ for atom_name, _ in pairs(sa_atoms) do
+ if not regexp_rules_symbol_states[atom_name] then
+ regexp_rules_symbol_states[atom_name] = {
+ state = 'available',
+ rule_name = 'unknown',
+ type = 'atom'
+ }
+ end
+ end
+
+ for meta_name, meta_rule in pairs(sa_meta_rules) do
+ if not regexp_rules_symbol_states[meta_name] then
+ regexp_rules_symbol_states[meta_name] = {
+ state = 'available',
+ rule_name = meta_rule.rule_name or 'unknown',
+ type = 'meta'
+ }
+ end
+ end
+
for k, m in pairs(opts) do
if type(m) == 'table' and m['type'] then
local rule = add_multimap_rule(k, m)
@@ -1461,5 +2242,29 @@ if opts and type(opts) == 'table' then
if #rules == 0 then
lua_util.disable_module(N, "config")
+ else
+ -- Finalize SpamAssassin-like rules after all maps are processed
+ local has_sa_rules = false
+ for _, rule in ipairs(rules) do
+ if rule.uses_sa then
+ has_sa_rules = true
+ break
+ end
+ end
+
+ if has_sa_rules then
+ -- Add a callback to synchronize symbol states in worker processes
+ rspamd_config:add_on_load(function(cfg, ev_base, worker)
+ -- Synchronize symbol states with loaded scopes in worker processes
+ if worker then
+ sync_regexp_rules_symbol_states()
+ end
+ end)
+
+ -- Export utility functions for debugging/monitoring
+ rspamd_plugins.multimap = rspamd_plugins.multimap or {}
+ rspamd_plugins.multimap.get_regexp_rules_symbol_stats = get_regexp_rules_symbol_stats
+ rspamd_plugins.multimap.cleanup_orphaned_regexp_rules_symbols = cleanup_orphaned_regexp_rules_symbols
+ end
end
end
diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua
index 3f5c9e634..4dc3fd924 100644
--- a/src/plugins/lua/phishing.lua
+++ b/src/plugins/lua/phishing.lua
@@ -39,7 +39,7 @@ local anchor_exceptions_maps = {}
local strict_domains_maps = {}
local phishing_feed_exclusion_map = nil
local generic_service_map = nil
-local openphish_map = 'https://www.openphish.com/feed.txt'
+local openphish_map = 'https://raw.githubusercontent.com/openphish/public_feed/refs/heads/main/feed.txt'
local phishtank_suffix = 'phishtank.rspamd.com'
-- Not enabled by default as their feed is quite large
local openphish_premium = false
diff --git a/src/plugins/lua/ratelimit.lua b/src/plugins/lua/ratelimit.lua
index c20e61b17..d463658fa 100644
--- a/src/plugins/lua/ratelimit.lua
+++ b/src/plugins/lua/ratelimit.lua
@@ -373,7 +373,7 @@ local function ratelimit_cb(task)
local function gen_check_cb(prefix, bucket, lim_name, lim_key)
return function(err, data)
if err then
- rspamd_logger.errx('cannot check limit %s: %s %s', prefix, err, data)
+ rspamd_logger.errx('cannot check limit %s: %s', prefix, err)
elseif type(data) == 'table' and data[1] then
lua_util.debugm(N, task,
"got reply for limit %s (%s / %s); %s burst, %s:%s dyn, %s leaked",
@@ -416,7 +416,7 @@ local function ratelimit_cb(task)
task:set_pre_result('soft reject',
message_func(task, lim_name, prefix, bucket, lim_key), N)
else
- task:set_pre_result('soft reject', bucket.message)
+ task:set_pre_result('soft reject', bucket.message, N)
end
end
end
@@ -476,7 +476,7 @@ local function maybe_cleanup_pending(task)
local bucket = v.bucket
local function cleanup_cb(err, data)
if err then
- rspamd_logger.errx('cannot cleanup limit %s: %s %s', k, err, data)
+ rspamd_logger.errx('cannot cleanup limit %s: %s', k, err)
else
lua_util.debugm(N, task, 'cleaned pending bucked for %s: %s', k, data)
end
diff --git a/src/plugins/lua/rbl.lua b/src/plugins/lua/rbl.lua
index 76c84f85d..b5b904b00 100644
--- a/src/plugins/lua/rbl.lua
+++ b/src/plugins/lua/rbl.lua
@@ -40,6 +40,7 @@ local N = 'rbl'
-- Checks that could be performed by rbl module
local local_exclusions
+local disabled_rbl_suffixes -- Map of disabled rbl suffixes
local white_symbols = {}
local black_symbols = {}
local monitored_addresses = {}
@@ -220,7 +221,9 @@ matchers.radix = function(_, _, real_ip, map)
end
matchers.equality = function(codes, to_match)
- if type(codes) ~= 'table' then return codes == to_match end
+ if type(codes) ~= 'table' then
+ return codes == to_match
+ end
for _, ip in ipairs(codes) do
if to_match == ip then
return true
@@ -470,6 +473,17 @@ local function gen_rbl_callback(rule)
return true
end
+ local function is_allowed(task, _)
+ if disabled_rbl_suffixes then
+ if disabled_rbl_suffixes:get_key(rule.rbl) then
+ lua_util.debugm(N, task, 'skip rbl check: %s; disabled by suffix', rule.rbl)
+ return false
+ end
+ end
+
+ return true
+ end
+
local function check_required_symbols(task, _)
if rule.require_symbols then
return fun.all(function(sym)
@@ -596,7 +610,7 @@ local function gen_rbl_callback(rule)
ignore_ip = rule.no_ip,
need_images = rule.images,
need_emails = false,
- need_content = rule.content_urls or false,
+ need_content = rule.content_urls,
esld_limit = esld_lim,
no_cache = true,
}
@@ -698,9 +712,9 @@ local function gen_rbl_callback(rule)
requests_table, 'received',
whitelist)
else
- lua_util.debugm(N, task, 'rbl %s; skip check_received for %s:' ..
- 'Received IP same as From IP and will be checked only in check_from function',
- rule.symbol, rh.real_ip)
+ lua_util.debugm(N, task, 'rbl %s; skip check_received for %s:' ..
+ 'Received IP same as From IP and will be checked only in check_from function',
+ rule.symbol, rh.real_ip)
end
end
end
@@ -838,6 +852,7 @@ local function gen_rbl_callback(rule)
-- Create function pipeline depending on rbl settings
local pipeline = {
+ is_allowed, -- check if rbl is allowed
is_alive, -- check monitored status
check_required_symbols -- if we have require_symbols then check those symbols
}
@@ -983,7 +998,7 @@ local function gen_rbl_callback(rule)
if req.resolve_ip then
-- Deal with both ipv4 and ipv6
-- Resolve names first
- if r:resolve_a({
+ if (rule.ipv4 == nil or rule.ipv4) and r:resolve_a({
task = task,
name = req.n,
callback = gen_rbl_ip_dns_callback(req),
@@ -991,7 +1006,7 @@ local function gen_rbl_callback(rule)
}) then
nresolved = nresolved + 1
end
- if r:resolve('aaaa', {
+ if (rule.ipv6 == nil or rule.ipv6) and r:resolve('aaaa', {
task = task,
name = req.n,
callback = gen_rbl_ip_dns_callback(req),
@@ -1062,7 +1077,7 @@ local function add_rbl(key, rbl, global_opts)
rbl.selector_flatten)
if not sel then
- rspamd_logger.errx('invalid selector for rbl rule %s: %s', key, selector)
+ rspamd_logger.errx(rspamd_config, 'invalid selector for rbl rule %s: %s', key, selector)
return false
end
@@ -1108,9 +1123,10 @@ local function add_rbl(key, rbl, global_opts)
end
for label, v in pairs(rbl.returncodes) do
if type(v) ~= 'table' then
- v = {v}
+ v = { v }
end
- rbl.returncodes_maps[label] = lua_maps.map_add_from_ucl(v, match_type, string.format('%s_%s RBL returncodes', label, rbl.symbol))
+ rbl.returncodes_maps[label] = lua_maps.map_add_from_ucl(v, match_type,
+ string.format('%s_%s RBL returncodes', label, rbl.symbol))
end
end
@@ -1319,6 +1335,11 @@ if type(opts.attached_maps) == 'table' then
end
end
+if opts.disabled_rbl_suffixes_map then
+ disabled_rbl_suffixes = lua_maps.map_add_from_ucl(opts.disabled_rbl_suffixes_map, 'set',
+ 'Disabled suffixes for RBL')
+end
+
for key, rbl in pairs(opts.rbls) do
if type(rbl) ~= 'table' or rbl.disabled == true or rbl.enabled == false then
rspamd_logger.infox(rspamd_config, 'disable rbl "%s"', key)
diff --git a/src/plugins/lua/replies.lua b/src/plugins/lua/replies.lua
index 08fb68bc7..2f0153d00 100644
--- a/src/plugins/lua/replies.lua
+++ b/src/plugins/lua/replies.lua
@@ -79,8 +79,8 @@ local function configure_redis_scripts(_, _)
end
]]
local set_script_zadd_global = lua_util.jinja_template(redis_script_zadd_global,
- { max_global_size = settings.max_global_size })
- global_replies_set_script = lua_redis.add_redis_script(set_script_zadd_global, redis_params)
+ { max_global_size = settings.max_global_size })
+ global_replies_set_script = lua_redis.add_redis_script(set_script_zadd_global, redis_params)
local redis_script_zadd_local = [[
redis.call('ZREMRANGEBYRANK', KEYS[1], 0, -({= max_local_size =} + 1)) -- keeping size of local replies set
@@ -102,7 +102,7 @@ local function configure_redis_scripts(_, _)
end
]]
local set_script_zadd_local = lua_util.jinja_template(redis_script_zadd_local,
- { expire_time = settings.expire, max_local_size = settings.max_local_size })
+ { expire_time = settings.expire, max_local_size = settings.max_local_size })
local_replies_set_script = lua_redis.add_redis_script(set_script_zadd_local, redis_params)
end
@@ -110,7 +110,7 @@ local function replies_check(task)
local in_reply_to
local function check_recipient(stored_rcpt)
- local rcpts = task:get_recipients('mime')
+ local rcpts = task:get_recipients('smtp')
lua_util.debugm(N, task, 'recipients: %s', rcpts)
if rcpts then
local filter_predicate = function(input_rcpt)
@@ -119,7 +119,7 @@ local function replies_check(task)
return real_rcpt_h == stored_rcpt
end
- if fun.any(filter_predicate, fun.map(function(rcpt)
+ if fun.all(filter_predicate, fun.map(function(rcpt)
return rcpt.addr or ''
end, rcpts)) then
lua_util.debugm(N, task, 'reply to %s validated', in_reply_to)
@@ -155,9 +155,9 @@ local function replies_check(task)
end
lua_redis.exec_redis_script(global_replies_set_script,
- { task = task, is_write = true },
- zadd_global_set_cb,
- { global_key }, params)
+ { task = task, is_write = true },
+ zadd_global_set_cb,
+ { global_key }, params)
end
local function add_to_replies_set(recipients)
@@ -173,7 +173,7 @@ local function replies_check(task)
local params = recipients
lua_util.debugm(N, task,
- 'Adding recipients %s to sender %s local replies set', recipients, sender_key)
+ 'Adding recipients %s to sender %s local replies set', recipients, sender_key)
local function zadd_cb(err, _)
if err ~= nil then
@@ -189,9 +189,9 @@ local function replies_check(task)
table.insert(params, 1, task_time_str)
lua_redis.exec_redis_script(local_replies_set_script,
- { task = task, is_write = true },
- zadd_cb,
- { sender_key }, params)
+ { task = task, is_write = true },
+ zadd_cb,
+ { sender_key }, params)
end
local function redis_get_cb(err, data, addr)
@@ -387,7 +387,7 @@ if opts then
end
lua_redis.register_prefix(settings.sender_prefix, N,
- 'Prefix to identify replies sets')
+ 'Prefix to identify replies sets')
local id = rspamd_config:register_symbol({
name = 'REPLIES_CHECK',
diff --git a/src/plugins/lua/reputation.lua b/src/plugins/lua/reputation.lua
index bd7d91932..eacaee064 100644
--- a/src/plugins/lua/reputation.lua
+++ b/src/plugins/lua/reputation.lua
@@ -200,7 +200,9 @@ local function dkim_reputation_filter(task, rule)
end
end
- if sel_tld and requests[sel_tld] then
+ if rule.selector.config.exclusion_map and sel_tld and rule.selector.config.exclusion_map:get_key(sel_tld) then
+ lua_util.debugm(N, task, 'DKIM domain %s is excluded from reputation scoring', sel_tld)
+ elseif sel_tld and requests[sel_tld] then
if requests[sel_tld] == 'a' then
rep_accepted = rep_accepted + generic_reputation_calc(v, rule, 1.0, task)
end
@@ -243,9 +245,13 @@ local function dkim_reputation_idempotent(task, rule)
if sc then
for dom, res in pairs(requests) do
- -- tld + "." + check_result, e.g. example.com.+ - reputation for valid sigs
- local query = string.format('%s.%s', dom, res)
- rule.backend.set_token(task, rule, nil, query, sc)
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(dom) then
+ lua_util.debugm(N, task, 'DKIM domain %s is excluded from reputation update', dom)
+ else
+ -- tld + "." + check_result, e.g. example.com.+ - reputation for valid sigs
+ local query = string.format('%s.%s', dom, res)
+ rule.backend.set_token(task, rule, nil, query, sc)
+ end
end
end
end
@@ -277,6 +283,7 @@ local dkim_selector = {
outbound = true,
inbound = true,
max_accept_adjustment = 2.0, -- How to adjust accepted DKIM score
+ exclusion_map = nil
},
dependencies = { "DKIM_TRACE" },
filter = dkim_reputation_filter, -- used to get scores
@@ -356,10 +363,14 @@ local function url_reputation_filter(task, rule)
for i, res in pairs(results) do
local req = requests[i]
if req then
- local url_score = generic_reputation_calc(res, rule,
- req[2] / mhits, task)
- lua_util.debugm(N, task, "score for url %s is %s, score=%s", req[1], url_score, score)
- score = score + url_score
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(req[1]) then
+ lua_util.debugm(N, task, 'URL domain %s is excluded from reputation scoring', req[1])
+ else
+ local url_score = generic_reputation_calc(res, rule,
+ req[2] / mhits, task)
+ lua_util.debugm(N, task, "score for url %s is %s, score=%s", req[1], url_score, score)
+ score = score + url_score
+ end
end
end
@@ -386,7 +397,11 @@ local function url_reputation_idempotent(task, rule)
if sc then
for _, tld in ipairs(requests) do
- rule.backend.set_token(task, rule, nil, tld[1], sc)
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(tld[1]) then
+ lua_util.debugm(N, task, 'URL domain %s is excluded from reputation update', tld[1])
+ else
+ rule.backend.set_token(task, rule, nil, tld[1], sc)
+ end
end
end
end
@@ -401,6 +416,7 @@ local url_selector = {
check_from = true,
outbound = true,
inbound = true,
+ exclusion_map = nil
},
filter = url_reputation_filter, -- used to get scores
idempotent = url_reputation_idempotent -- used to set scores
@@ -439,6 +455,11 @@ local function ip_reputation_filter(task, rule)
ip = ip:apply_mask(cfg.ipv6_mask)
end
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(ip) then
+ lua_util.debugm(N, task, 'IP %s is excluded from reputation scoring', tostring(ip))
+ return
+ end
+
local pool = task:get_mempool()
local asn = pool:get_variable("asn")
local country = pool:get_variable("country")
@@ -554,6 +575,11 @@ local function ip_reputation_idempotent(task, rule)
ip = ip:apply_mask(cfg.ipv6_mask)
end
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(ip) then
+ lua_util.debugm(N, task, 'IP %s is excluded from reputation update', tostring(ip))
+ return
+ end
+
local pool = task:get_mempool()
local asn = pool:get_variable("asn")
local country = pool:get_variable("country")
@@ -600,6 +626,7 @@ local ip_selector = {
inbound = true,
ipv4_mask = 32, -- Mask bits for ipv4
ipv6_mask = 64, -- Mask bits for ipv6
+ exclusion_map = nil
},
--dependencies = {"ASN"}, -- ASN is a prefilter now...
init = ip_reputation_init,
@@ -621,6 +648,11 @@ local function spf_reputation_filter(task, rule)
local cr = require "rspamd_cryptobox_hash"
local hkey = cr.create(spf_record):base32():sub(1, 32)
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(hkey) then
+ lua_util.debugm(N, task, 'SPF record %s is excluded from reputation scoring', hkey)
+ return
+ end
+
lua_util.debugm(N, task, 'check spf record %s -> %s', spf_record, hkey)
local function tokens_cb(err, token, values)
@@ -649,6 +681,11 @@ local function spf_reputation_idempotent(task, rule)
local cr = require "rspamd_cryptobox_hash"
local hkey = cr.create(spf_record):base32():sub(1, 32)
+ if rule.selector.config.exclusion_map and rule.selector.config.exclusion_map:get_key(hkey) then
+ lua_util.debugm(N, task, 'SPF record %s is excluded from reputation update', hkey)
+ return
+ end
+
lua_util.debugm(N, task, 'set spf record %s -> %s = %s',
spf_record, hkey, sc)
rule.backend.set_token(task, rule, nil, hkey, sc)
@@ -663,6 +700,7 @@ local spf_selector = {
max_score = nil,
outbound = true,
inbound = true,
+ exclusion_map = nil
},
dependencies = { "R_SPF_ALLOW" },
filter = spf_reputation_filter, -- used to get scores
@@ -697,6 +735,13 @@ local function generic_reputation_init(rule)
'Whitelisted selectors')
end
+ if cfg.exclusion_map then
+ cfg.exclusion_map = lua_maps.map_add('reputation',
+ 'generic_exclusion',
+ 'set',
+ 'Excluded selectors')
+ end
+
return true
end
@@ -706,6 +751,10 @@ local function generic_reputation_filter(task, rule)
local function tokens_cb(err, token, values)
if values then
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(token) then
+ lua_util.debugm(N, task, 'Generic selector token %s is excluded from reputation scoring', token)
+ return
+ end
local score = generic_reputation_calc(values, rule, 1.0, task)
if math.abs(score) > 1e-3 then
@@ -742,14 +791,22 @@ local function generic_reputation_idempotent(task, rule)
if sc then
if type(selector_res) == 'table' then
fun.each(function(e)
- lua_util.debugm(N, task, 'set generic selector (%s) %s = %s',
- rule['symbol'], e, sc)
- rule.backend.set_token(task, rule, nil, e, sc)
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(e) then
+ lua_util.debugm(N, task, 'Generic selector token %s is excluded from reputation update', e)
+ else
+ lua_util.debugm(N, task, 'set generic selector (%s) %s = %s',
+ rule['symbol'], e, sc)
+ rule.backend.set_token(task, rule, nil, e, sc)
+ end
end, selector_res)
else
- lua_util.debugm(N, task, 'set generic selector (%s) %s = %s',
- rule['symbol'], selector_res, sc)
- rule.backend.set_token(task, rule, nil, selector_res, sc)
+ if cfg.exclusion_map and cfg.exclusion_map:get_key(selector_res) then
+ lua_util.debugm(N, task, 'Generic selector token %s is excluded from reputation update', selector_res)
+ else
+ lua_util.debugm(N, task, 'set generic selector (%s) %s = %s',
+ rule['symbol'], selector_res, sc)
+ rule.backend.set_token(task, rule, nil, selector_res, sc)
+ end
end
end
end
@@ -764,6 +821,7 @@ local generic_selector = {
selector = ts.string,
delimiter = ts.string,
whitelist = ts.one_of(lua_maps.map_schema, lua_maps_exprs.schema):is_optional(),
+ exclusion_map = ts.one_of(lua_maps.map_schema, lua_maps_exprs.schema):is_optional()
},
config = {
lower_bound = 10, -- minimum number of messages to be scored
@@ -773,7 +831,8 @@ local generic_selector = {
inbound = true,
selector = nil,
delimiter = ':',
- whitelist = nil
+ whitelist = nil,
+ exclusion_map = nil
},
init = generic_reputation_init,
filter = generic_reputation_filter, -- used to get scores
@@ -1107,7 +1166,7 @@ local backends = {
name = '1m',
mult = 1.0,
}
- }, -- What buckets should be used, default 1h and 1month
+ }, -- What buckets should be used, default 1month
},
init = reputation_redis_init,
get_token = reputation_redis_get_token,
@@ -1267,6 +1326,24 @@ local function parse_rule(name, tbl)
end
end
+ -- Parse exclusion_map for reputation exclusion lists
+ if rule.config.exclusion_map then
+ local map_type = 'set' -- Default to set for string-based selectors (dkim, url, spf, generic)
+ if sel_type == 'ip' or sel_type == 'sender' then
+ map_type = 'radix' -- Use radix for IP-based selectors
+ end
+ local map = lua_maps.map_add_from_ucl(rule.config.exclusion_map,
+ map_type,
+ sel_type .. ' reputation exclusion map')
+ if not map then
+ rspamd_logger.errx(rspamd_config, "cannot parse exclusion map config for %s: (%s)",
+ sel_type,
+ rule.config.exclusion_map)
+ return false
+ end
+ rule.config.exclusion_map = map
+ end
+
local symbol = rule.selector.config.symbol or name
if tbl.symbol then
symbol = tbl.symbol
@@ -1387,4 +1464,4 @@ if opts['rules'] then
end
else
lua_util.disable_module(N, "config")
-end
+end \ No newline at end of file
diff --git a/src/plugins/lua/settings.lua b/src/plugins/lua/settings.lua
index 69d31d301..c576e1325 100644
--- a/src/plugins/lua/settings.lua
+++ b/src/plugins/lua/settings.lua
@@ -248,17 +248,13 @@ local function check_string_setting(expected, str)
end
local function check_ip_setting(expected, ip)
- if not expected[2] then
- if lua_maps.rspamd_maybe_check_map(expected[1], ip:to_string()) then
+ if type(expected) == "string" then
+ if lua_maps.rspamd_maybe_check_map(expected, ip:to_string()) then
return true
end
else
- if expected[2] ~= 0 then
- local nip = ip:apply_mask(expected[2])
- if nip and nip:to_string() == expected[1] then
- return true
- end
- elseif ip:to_string() == expected[1] then
+ local nip = ip:apply_mask(expected[2])
+ if nip and nip:to_string() == expected[1] then
return true
end
end
@@ -464,44 +460,52 @@ local function gen_settings_external_cb(name)
end
-- Process IP address: converted to a table {ip, mask}
-local function process_ip_condition(ip)
- local out = {}
-
+local function process_ip_condition(ip, out)
if type(ip) == "table" then
for _, v in ipairs(ip) do
- table.insert(out, process_ip_condition(v))
+ process_ip_condition(v, out)
end
- elseif type(ip) == "string" then
- local slash = string.find(ip, '/')
+ return
+ end
- if not slash then
- -- Just a plain IP address
- local res = rspamd_ip.from_string(ip)
+ if type(ip) == "string" then
+ if string.sub(ip, 1, 4) == "map:" then
+ -- It is a map, don't apply any extra logic
+ table.insert(out, ip)
+ return
+ end
- if res:is_valid() then
- out[1] = res:to_string()
- out[2] = 0
- else
- -- It can still be a map
- out[1] = ip
- end
- else
- local res = rspamd_ip.from_string(string.sub(ip, 1, slash - 1))
- local mask = tonumber(string.sub(ip, slash + 1))
+ local mask
+ local slash = string.find(ip, '/')
+ if slash then
+ mask = string.sub(ip, slash + 1)
+ ip = string.sub(ip, 1, slash - 1)
+ end
+
+ local res = rspamd_ip.from_string(ip)
+ if res:is_valid() then
+ if mask then
+ local mask_num = tonumber(mask)
+ if mask_num then
+ -- normalize IP
+ res = res:apply_mask(mask_num)
+ if res:is_valid() then
+ table.insert(out, { res:to_string(), mask_num })
+ return
+ end
+ end
- if res:is_valid() then
- out[1] = res:to_string()
- out[2] = mask
- else
- rspamd_logger.errx(rspamd_config, "bad IP address: " .. ip)
- return nil
+ rspamd_logger.errx(rspamd_config, "bad IP mask: %s/%s", ip, mask)
+ return
end
+
+ -- Just a plain IP address
+ table.insert(out, res:to_string())
+ return
end
- else
- return nil
end
- return out
+ rspamd_logger.errx(rspamd_config, "bad IP address: " .. ip)
end
-- Process email like condition, converted to a table with fields:
@@ -613,6 +617,12 @@ end
-- Used to create a checking closure: if value matches expected somehow, return true
local function gen_check_closure(expected, check_func)
+ if not check_func then
+ check_func = function(a, b)
+ return a == b
+ end
+ end
+
return function(value)
if not value then
return false
@@ -623,13 +633,6 @@ local function gen_check_closure(expected, check_func)
end
if value then
-
- if not check_func then
- check_func = function(a, b)
- return a == b
- end
- end
-
local ret
if type(expected) == 'table' then
ret = fun.any(function(d)
@@ -659,22 +662,21 @@ local function process_settings_table(tbl, allow_ids, mempool, is_static)
local checks = {}
if elt.ip then
- local ips_table = process_ip_condition(elt['ip'])
+ local ips_table = {}
+ process_ip_condition(elt.ip, ips_table)
- if ips_table then
- lua_util.debugm(N, rspamd_config, 'added ip condition to "%s": %s',
- name, ips_table)
- checks.ip = {
- check = gen_check_closure(convert_to_table(elt.ip, ips_table), check_ip_setting),
- extract = function(task)
- local ip = task:get_from_ip()
- if ip and ip:is_valid() then
- return ip
- end
- return nil
- end,
- }
- end
+ lua_util.debugm(N, rspamd_config, 'added ip condition to "%s": %s',
+ name, ips_table)
+ checks.ip = {
+ check = gen_check_closure(ips_table, check_ip_setting),
+ extract = function(task)
+ local ip = task:get_from_ip()
+ if ip and ip:is_valid() then
+ return ip
+ end
+ return nil
+ end,
+ }
end
if elt.ip_map then
local ips_map = lua_maps.map_add_from_ucl(elt.ip_map, 'radix',
@@ -697,23 +699,21 @@ local function process_settings_table(tbl, allow_ids, mempool, is_static)
end
if elt.client_ip then
- local client_ips_table = process_ip_condition(elt.client_ip)
-
- if client_ips_table then
- lua_util.debugm(N, rspamd_config, 'added client_ip condition to "%s": %s',
- name, client_ips_table)
- checks.client_ip = {
- check = gen_check_closure(convert_to_table(elt.client_ip, client_ips_table),
- check_ip_setting),
- extract = function(task)
- local ip = task:get_client_ip()
- if ip:is_valid() then
- return ip
- end
- return nil
- end,
- }
- end
+ local client_ips_table = {}
+ process_ip_condition(elt.client_ip, client_ips_table)
+
+ lua_util.debugm(N, rspamd_config, 'added client_ip condition to "%s": %s',
+ name, client_ips_table)
+ checks.client_ip = {
+ check = gen_check_closure(client_ips_table, check_ip_setting),
+ extract = function(task)
+ local ip = task:get_client_ip()
+ if ip:is_valid() then
+ return ip
+ end
+ return nil
+ end,
+ }
end
if elt.client_ip_map then
local ips_map = lua_maps.map_add_from_ucl(elt.ip_map, 'radix',
@@ -1275,7 +1275,7 @@ local function gen_redis_callback(handler, id)
ucl_err)
else
local obj = parser:get_object()
- rspamd_logger.infox(task, "<%1> apply settings according to redis rule %2",
+ rspamd_logger.infox(task, "<%s> apply settings according to redis rule %s",
task:get_message_id(), id)
apply_settings(task, obj, nil, 'redis')
break
@@ -1283,7 +1283,7 @@ local function gen_redis_callback(handler, id)
end
end
elseif err then
- rspamd_logger.errx(task, 'Redis error: %1', err)
+ rspamd_logger.errx(task, 'Redis error: %s', err)
end
end
@@ -1371,7 +1371,7 @@ if set_section and set_section[1] and type(set_section[1]) == "string" then
opaque_data = true
}
if not rspamd_config:add_map(map_attrs) then
- rspamd_logger.errx(rspamd_config, 'cannot load settings from %1', set_section)
+ rspamd_logger.errx(rspamd_config, 'cannot load settings from %s', set_section)
end
elseif set_section and type(set_section) == "table" then
settings_map_pool = rspamd_mempool.create()
diff --git a/src/plugins/lua/spamassassin.lua b/src/plugins/lua/spamassassin.lua
index 3ea794495..c03481de2 100644
--- a/src/plugins/lua/spamassassin.lua
+++ b/src/plugins/lua/spamassassin.lua
@@ -221,7 +221,7 @@ local function handle_header_def(hline, cur_rule)
})
cur_rule['function'] = function(task)
if not re then
- rspamd_logger.errx(task, 're is missing for rule %1', h)
+ rspamd_logger.errx(task, 're is missing for rule %s', h)
return 0
end
@@ -272,7 +272,7 @@ local function handle_header_def(hline, cur_rule)
elseif func == 'case' then
cur_param['strong'] = true
else
- rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
+ rspamd_logger.warnx(rspamd_config, 'Function %s is not supported in %s',
func, cur_rule['symbol'])
end
end, fun.tail(args))
@@ -314,7 +314,7 @@ end
local function freemail_search(input)
local res = 0
local function trie_callback(number, pos)
- lua_util.debugm(N, rspamd_config, 'Matched pattern %1 at pos %2', freemail_domains[number], pos)
+ lua_util.debugm(N, rspamd_config, 'Matched pattern %s at pos %s', freemail_domains[number], pos)
res = res + 1
end
@@ -369,7 +369,7 @@ local function gen_eval_rule(arg)
end
return 0
else
- rspamd_logger.infox(rspamd_config, 'cannot create regexp %1', re)
+ rspamd_logger.infox(rspamd_config, 'cannot create regexp %s', re)
return 0
end
end
@@ -461,7 +461,7 @@ local function gen_eval_rule(arg)
end
end
else
- rspamd_logger.infox(task, 'unimplemented mime check %1', arg)
+ rspamd_logger.infox(task, 'unimplemented mime check %s', arg)
end
end
@@ -576,7 +576,7 @@ local function maybe_parse_sa_function(line)
local elts = split(line, '[^:]+')
arg = elts[2]
- lua_util.debugm(N, rspamd_config, 'trying to parse SA function %1 with args %2',
+ lua_util.debugm(N, rspamd_config, 'trying to parse SA function %s with args %s',
elts[1], elts[2])
local substitutions = {
{ '^exists:',
@@ -612,7 +612,7 @@ local function maybe_parse_sa_function(line)
end
if not func then
- rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %1',
+ rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %s',
arg)
else
return func(task)
@@ -685,7 +685,7 @@ local function process_sa_conf(f)
end
-- We have previous rule valid
if not cur_rule['symbol'] then
- rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
+ rspamd_logger.errx(rspamd_config, 'bad rule definition: %s', cur_rule)
end
rules[cur_rule['symbol']] = cur_rule
cur_rule = {}
@@ -695,15 +695,15 @@ local function process_sa_conf(f)
local function parse_score(words)
if #words == 3 then
-- score rule <x>
- lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[3])
+ lua_util.debugm(N, rspamd_config, 'found score for %s: %s', words[2], words[3])
return tonumber(words[3])
elseif #words == 6 then
-- score rule <x1> <x2> <x3> <x4>
-- we assume here that bayes and network are enabled and select <x4>
- lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[6])
+ lua_util.debugm(N, rspamd_config, 'found score for %s: %s', words[2], words[6])
return tonumber(words[6])
else
- rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
+ rspamd_logger.errx(rspamd_config, 'invalid score for %s', words[2])
end
return 0
@@ -812,7 +812,7 @@ local function process_sa_conf(f)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if not cur_rule['re'] then
- rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
+ rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%s' for %s",
cur_rule['re_expr'], cur_rule['symbol'])
else
cur_rule['re']:set_max_hits(1)
@@ -829,8 +829,8 @@ local function process_sa_conf(f)
cur_rule['mime'] = false
end
- if cur_rule['re'] and cur_rule['symbol'] and
- (cur_rule['header'] or cur_rule['function']) then
+ if cur_rule['re'] and cur_rule['symbol']
+ and (cur_rule['header'] or cur_rule['function']) then
valid_rule = true
cur_rule['re']:set_max_hits(1)
if cur_rule['header'] and cur_rule['ordinary'] then
@@ -894,7 +894,7 @@ local function process_sa_conf(f)
cur_rule['function'] = func
valid_rule = true
else
- rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ rspamd_logger.infox(rspamd_config, 'unknown function %s', args)
end
end
elseif words[1] == "body" then
@@ -931,7 +931,7 @@ local function process_sa_conf(f)
cur_rule['function'] = func
valid_rule = true
else
- rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ rspamd_logger.infox(rspamd_config, 'unknown function %s', args)
end
end
elseif words[1] == "rawbody" then
@@ -968,7 +968,7 @@ local function process_sa_conf(f)
cur_rule['function'] = func
valid_rule = true
else
- rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ rspamd_logger.infox(rspamd_config, 'unknown function %s', args)
end
end
elseif words[1] == "full" then
@@ -1006,7 +1006,7 @@ local function process_sa_conf(f)
cur_rule['function'] = func
valid_rule = true
else
- rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ rspamd_logger.infox(rspamd_config, 'unknown function %s', args)
end
end
elseif words[1] == "uri" then
@@ -1265,11 +1265,11 @@ local function post_process()
if res then
local nre = rspamd_regexp.create(nexpr)
if not nre then
- rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r)
+ rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %s', r)
--rule['re'] = nil
else
local old_max_hits = rule['re']:get_max_hits()
- lua_util.debugm(N, rspamd_config, 'replace %1 -> %2', r, nexpr)
+ lua_util.debugm(N, rspamd_config, 'replace %s -> %s', r, nexpr)
rspamd_config:replace_regexp({
old_re = rule['re'],
new_re = nre,
@@ -1306,8 +1306,7 @@ local function post_process()
end
if not r['re'] then
- rspamd_logger.errx(task, 're is missing for rule %1 (%2 header)', k,
- h['header'])
+ rspamd_logger.errx(task, 're is missing for rule %s', h)
return 0
end
@@ -1434,7 +1433,7 @@ local function post_process()
fun.each(function(k, r)
local f = function(task)
if not r['re'] then
- rspamd_logger.errx(task, 're is missing for rule %1', k)
+ rspamd_logger.errx(task, 're is missing for rule %s', k)
return 0
end
@@ -1461,7 +1460,7 @@ local function post_process()
fun.each(function(k, r)
local f = function(task)
if not r['re'] then
- rspamd_logger.errx(task, 're is missing for rule %1', k)
+ rspamd_logger.errx(task, 're is missing for rule %s', k)
return 0
end
@@ -1486,7 +1485,7 @@ local function post_process()
fun.each(function(k, r)
local f = function(task)
if not r['re'] then
- rspamd_logger.errx(task, 're is missing for rule %1', k)
+ rspamd_logger.errx(task, 're is missing for rule %s', k)
return 0
end
@@ -1629,8 +1628,8 @@ local function post_process()
rspamd_config:register_dependency(k, rspamd_symbol)
external_deps[k][rspamd_symbol] = true
lua_util.debugm(N, rspamd_config,
- 'atom %1 is a direct foreign dependency, ' ..
- 'register dependency for %2 on %3',
+ 'atom %s is a direct foreign dependency, ' ..
+ 'register dependency for %s on %s',
a, k, rspamd_symbol)
end
end
@@ -1659,8 +1658,8 @@ local function post_process()
rspamd_config:register_dependency(k, dep)
external_deps[k][dep] = true
lua_util.debugm(N, rspamd_config,
- 'atom %1 is an indirect foreign dependency, ' ..
- 'register dependency for %2 on %3',
+ 'atom %s is an indirect foreign dependency, ' ..
+ 'register dependency for %s on %s',
a, k, dep)
nchanges = nchanges + 1
end
@@ -1694,10 +1693,10 @@ local function post_process()
-- Logging output
if freemail_domains then
freemail_trie = rspamd_trie.create(freemail_domains)
- rspamd_logger.infox(rspamd_config, 'loaded %1 freemail domains definitions',
+ rspamd_logger.infox(rspamd_config, 'loaded %s freemail domains definitions',
#freemail_domains)
end
- rspamd_logger.infox(rspamd_config, 'loaded %1 blacklist/whitelist elements',
+ rspamd_logger.infox(rspamd_config, 'loaded %s blacklist/whitelist elements',
sa_lists['elts'])
end
@@ -1739,7 +1738,7 @@ if type(section) == "table" then
process_sa_conf(f)
has_rules = true
else
- rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
+ rspamd_logger.errx(rspamd_config, "cannot open %s", matched)
end
end
end
@@ -1758,7 +1757,7 @@ if type(section) == "table" then
process_sa_conf(f)
has_rules = true
else
- rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
+ rspamd_logger.errx(rspamd_config, "cannot open %s", matched)
end
end
end
diff --git a/src/plugins/lua/trie.lua b/src/plugins/lua/trie.lua
index 7ba455289..7c7214b55 100644
--- a/src/plugins/lua/trie.lua
+++ b/src/plugins/lua/trie.lua
@@ -107,10 +107,10 @@ local function process_trie_file(symbol, cf)
local file = io.open(cf['file'])
if not file then
- rspamd_logger.errx(rspamd_config, 'Cannot open trie file %1', cf['file'])
+ rspamd_logger.errx(rspamd_config, 'Cannot open trie file %s', cf['file'])
else
if cf['binary'] then
- rspamd_logger.errx(rspamd_config, 'binary trie patterns are not implemented yet: %1',
+ rspamd_logger.errx(rspamd_config, 'binary trie patterns are not implemented yet: %s',
cf['file'])
else
for line in file:lines() do
@@ -123,7 +123,7 @@ end
local function process_trie_conf(symbol, cf)
if type(cf) ~= 'table' then
- rspamd_logger.errx(rspamd_config, 'invalid value for symbol %1: "%2", expected table',
+ rspamd_logger.errx(rspamd_config, 'invalid value for symbol %s: "%s", expected table',
symbol, cf)
return
end
@@ -145,17 +145,17 @@ if opts then
if #raw_patterns > 0 then
raw_trie = rspamd_trie.create(raw_patterns)
- rspamd_logger.infox(rspamd_config, 'registered raw search trie from %1 patterns', #raw_patterns)
+ rspamd_logger.infox(rspamd_config, 'registered raw search trie from %s patterns', #raw_patterns)
end
if #mime_patterns > 0 then
mime_trie = rspamd_trie.create(mime_patterns)
- rspamd_logger.infox(rspamd_config, 'registered mime search trie from %1 patterns', #mime_patterns)
+ rspamd_logger.infox(rspamd_config, 'registered mime search trie from %s patterns', #mime_patterns)
end
if #body_patterns > 0 then
body_trie = rspamd_trie.create(body_patterns)
- rspamd_logger.infox(rspamd_config, 'registered body search trie from %1 patterns', #body_patterns)
+ rspamd_logger.infox(rspamd_config, 'registered body search trie from %s patterns', #body_patterns)
end
local id = -1