aboutsummaryrefslogtreecommitdiffstats
path: root/lualib
diff options
context:
space:
mode:
Diffstat (limited to 'lualib')
-rw-r--r--lualib/lua_bayes_redis.lua67
-rw-r--r--lualib/lua_cfg_transform.lua22
-rw-r--r--lualib/lua_dkim_tools.lua112
-rw-r--r--lualib/lua_magic/patterns.lua17
-rw-r--r--lualib/lua_magic/types.lua7
-rw-r--r--lualib/lua_redis.lua28
-rw-r--r--lualib/lua_scanners/icap.lua9
-rw-r--r--lualib/redis_scripts/bayes_cache_learn.lua17
-rw-r--r--lualib/redis_scripts/bayes_classify.lua75
-rw-r--r--lualib/redis_scripts/bayes_learn.lua55
-rw-r--r--lualib/rspamadm/dmarc_report.lua18
-rw-r--r--lualib/rspamadm/mime.lua401
12 files changed, 491 insertions, 337 deletions
diff --git a/lualib/lua_bayes_redis.lua b/lualib/lua_bayes_redis.lua
index 782e6fc47..a7af80bf1 100644
--- a/lualib/lua_bayes_redis.lua
+++ b/lualib/lua_bayes_redis.lua
@@ -25,27 +25,44 @@ local ucl = require "ucl"
local N = "bayes"
local function gen_classify_functor(redis_params, classify_script_id)
- return function(task, expanded_key, id, is_spam, stat_tokens, callback)
-
+ return function(task, expanded_key, id, class_labels, stat_tokens, callback)
local function classify_redis_cb(err, data)
lua_util.debugm(N, task, 'classify redis cb: %s, %s', err, data)
if err then
callback(task, false, err)
else
- callback(task, true, data[1], data[2], data[3], data[4])
+ -- Pass the raw data table to the C++ callback for processing
+ -- The C++ callback will handle both binary and multi-class formats
+ callback(task, true, data)
+ end
+ end
+
+ -- Determine class labels to send to Redis script
+ local script_class_labels
+ if type(class_labels) == "table" then
+ -- Use simple comma-separated string instead of messagepack
+ script_class_labels = "TABLE:" .. table.concat(class_labels, ",")
+ else
+ -- Single class label or boolean compatibility
+ if class_labels == true or class_labels == "true" then
+ script_class_labels = "S" -- spam
+ elseif class_labels == false or class_labels == "false" then
+ script_class_labels = "H" -- ham
+ else
+ script_class_labels = class_labels -- string class label
end
end
lua_redis.exec_redis_script(classify_script_id,
{ task = task, is_write = false, key = expanded_key },
- classify_redis_cb, { expanded_key, stat_tokens })
+ classify_redis_cb, { expanded_key, script_class_labels, stat_tokens })
end
end
local function gen_learn_functor(redis_params, learn_script_id)
- return function(task, expanded_key, id, is_spam, symbol, is_unlearn, stat_tokens, callback, maybe_text_tokens)
+ return function(task, expanded_key, id, class_label, symbol, is_unlearn, stat_tokens, callback, maybe_text_tokens)
local function learn_redis_cb(err, data)
- lua_util.debugm(N, task, 'learn redis cb: %s, %s', err, data)
+ lua_util.debugm(N, task, 'learn redis cb: %s, %s for class %s', err, data, class_label)
if err then
callback(task, false, err)
else
@@ -53,17 +70,24 @@ local function gen_learn_functor(redis_params, learn_script_id)
end
end
+ -- Convert class_label for backward compatibility
+ local script_class_label = class_label
+ if class_label == true or class_label == "true" then
+ script_class_label = "S" -- spam
+ elseif class_label == false or class_label == "false" then
+ script_class_label = "H" -- ham
+ end
+
if maybe_text_tokens then
lua_redis.exec_redis_script(learn_script_id,
{ task = task, is_write = true, key = expanded_key },
learn_redis_cb,
- { expanded_key, tostring(is_spam), symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens })
+ { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens })
else
lua_redis.exec_redis_script(learn_script_id,
{ task = task, is_write = true, key = expanded_key },
- learn_redis_cb, { expanded_key, tostring(is_spam), symbol, tostring(is_unlearn), stat_tokens })
+ learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens })
end
-
end
end
@@ -112,8 +136,7 @@ end
--- @param classifier_ucl ucl of the classifier config
--- @param statfile_ucl ucl of the statfile config
--- @return a pair of (classify_functor, learn_functor) or `nil` in case of error
-exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol, is_spam, ev_base, stat_periodic_cb)
-
+exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol, class_label, ev_base, stat_periodic_cb)
local redis_params = load_redis_params(classifier_ucl, statfile_ucl)
if not redis_params then
@@ -137,7 +160,6 @@ exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol,
if ev_base then
rspamd_config:add_periodic(ev_base, 0.0, function(cfg, _)
-
local function stat_redis_cb(err, data)
lua_util.debugm(N, cfg, 'stat redis cb: %s, %s', err, data)
@@ -162,11 +184,22 @@ exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol,
end
end
+ -- Convert class_label to learn key
+ local learn_key
+ if class_label == true or class_label == "true" or class_label == "S" then
+ learn_key = "learns_spam"
+ elseif class_label == false or class_label == "false" or class_label == "H" then
+ learn_key = "learns_ham"
+ else
+ -- For other class labels, use learns_<class_label>
+ learn_key = "learns_" .. string.lower(tostring(class_label))
+ end
+
lua_redis.exec_redis_script(stat_script_id,
{ ev_base = ev_base, cfg = cfg, is_write = false },
stat_redis_cb, { tostring(cursor),
symbol,
- is_spam and "learns_spam" or "learns_ham",
+ learn_key,
tostring(max_users) })
return statfile_ucl.monitor_timeout or classifier_ucl.monitor_timeout or 30.0
end)
@@ -178,7 +211,6 @@ end
local function gen_cache_check_functor(redis_params, check_script_id, conf)
local packed_conf = ucl.to_format(conf, 'msgpack')
return function(task, cache_id, callback)
-
local function classify_redis_cb(err, data)
lua_util.debugm(N, task, 'check cache redis cb: %s, %s (%s)', err, data, type(data))
if err then
@@ -201,17 +233,16 @@ end
local function gen_cache_learn_functor(redis_params, learn_script_id, conf)
local packed_conf = ucl.to_format(conf, 'msgpack')
- return function(task, cache_id, is_spam)
+ return function(task, cache_id, class_name, class_id)
local function learn_redis_cb(err, data)
lua_util.debugm(N, task, 'learn_cache redis cb: %s, %s', err, data)
end
- lua_util.debugm(N, task, 'try to learn cache: %s', cache_id)
+ lua_util.debugm(N, task, 'try to learn cache: %s as %s (id=%s)', cache_id, class_name, class_id)
lua_redis.exec_redis_script(learn_script_id,
{ task = task, is_write = true, key = cache_id },
learn_redis_cb,
- { cache_id, is_spam and "1" or "0", packed_conf })
-
+ { cache_id, tostring(class_id), packed_conf })
end
end
diff --git a/lualib/lua_cfg_transform.lua b/lualib/lua_cfg_transform.lua
index 265ca34c0..ec11ef299 100644
--- a/lualib/lua_cfg_transform.lua
+++ b/lualib/lua_cfg_transform.lua
@@ -198,20 +198,22 @@ end
local function symbol_transform(cfg, k, v)
local groups = cfg:at('group')
- -- first try to find any group where there is a definition of this symbol
- for gr_n, gr in groups:pairs() do
- local symbols = gr:at('symbols')
- if symbols and symbols:at(k) then
- -- We override group symbol with ungrouped symbol
- logger.infox("overriding group symbol %s in the group %s", k, gr_n)
- symbols[k] = lua_util.override_defaults(symbols:at(k):unwrap(), v:unwrap())
- return
+ if groups then
+ -- first try to find any group where there is a definition of this symbol
+ for gr_n, gr in groups:pairs() do
+ local symbols = gr:at('symbols')
+ if symbols and symbols:at(k) then
+ -- We override group symbol with ungrouped symbol
+ logger.infox("overriding group symbol %s in the group %s", k, gr_n)
+ symbols[k] = lua_util.override_defaults(symbols:at(k):unwrap(), v:unwrap())
+ return
+ end
end
end
-- Now check what Rspamd knows about this symbol
local sym = rspamd_config:get_symbol(k)
- if not sym or not sym.group then
+ if groups and (not sym or not sym.group) then
-- Otherwise we just use group 'ungrouped'
if not groups:at('ungrouped') then
groups.ungrouped = {
@@ -374,7 +376,7 @@ return function(cfg)
local next_act = actions_order[j]
if actions:at(next_act) and actions:at(next_act):type() == 'number' then
local next_score = actions:at(next_act):unwrap()
- if next_score <= score then
+ if type(score) == 'number' and type(next_score) == 'number' and next_score <= score then
logger.errx(rspamd_config, 'invalid actions thresholds order: action %s (%s) must have lower ' ..
'score than action %s (%s)', act, score, next_act, next_score)
ret = false
diff --git a/lualib/lua_dkim_tools.lua b/lualib/lua_dkim_tools.lua
index b7f520fae..69c9462b5 100644
--- a/lualib/lua_dkim_tools.lua
+++ b/lualib/lua_dkim_tools.lua
@@ -13,7 +13,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-]]--
+]] --
local exports = {}
@@ -33,7 +33,7 @@ local function check_violation(N, task, domain)
if task:has_symbol(sym_check) then
local sym = task:get_symbol(sym_check)[1]
logger.infox(task, 'skip signing for %s: violation %s found: %s',
- domain, sym_check, sym.options)
+ domain, sym_check, sym.options)
return false
end
@@ -92,7 +92,6 @@ local function parse_dkim_http_headers(N, task, settings)
local key = task:get_request_header(headers.key_header)
if not (domain and selector and key) then
-
logger.errx(task, 'missing required headers to sign email')
return false, {}
end
@@ -258,14 +257,14 @@ local function prepare_dkim_signing(N, task, settings)
-- OpenDKIM style
if is_skip_sign() then
lua_util.debugm(N, task,
- 'skip signing: is_sign_network: %s, is_authed: %s, is_local: %s',
- is_sign_networks, is_authed, is_local)
+ 'skip signing: is_sign_network: %s, is_authed: %s, is_local: %s',
+ is_sign_networks, is_authed, is_local)
return false, {}
end
if not hfrom or not hfrom[1] or not hfrom[1].addr then
lua_util.debugm(N, task,
- 'signing_table: cannot get data when no header from is presented')
+ 'signing_table: cannot get data when no header from is presented')
return false, {}
end
local sign_entry = settings.signing_table:get_key(hfrom[1].addr:lower())
@@ -273,7 +272,7 @@ local function prepare_dkim_signing(N, task, settings)
if sign_entry then
-- Check opendkim style entries
lua_util.debugm(N, task,
- 'signing_table: found entry for %s: %s', hfrom[1].addr, sign_entry)
+ 'signing_table: found entry for %s: %s', hfrom[1].addr, sign_entry)
if sign_entry == '%' then
sign_entry = hdom
end
@@ -291,7 +290,7 @@ local function prepare_dkim_signing(N, task, settings)
if not selector then
logger.errx(task, 'no selector defined for sign_entry %s, key_entry %s',
- sign_entry, key_entry)
+ sign_entry, key_entry)
return false, {}
end
@@ -305,11 +304,11 @@ local function prepare_dkim_signing(N, task, settings)
if st:sub(1, 1) == '/' or st == './' or st == '..' then
res.key = parts[2]:gsub('%%', hdom)
lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, key file=%s',
- hdom, selector, res.domain, res.key)
+ hdom, selector, res.domain, res.key)
else
res.rawkey = parts[2] -- No sanity check here
lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, raw key used',
- hdom, selector, res.domain)
+ hdom, selector, res.domain)
end
return true, { res }
@@ -327,56 +326,56 @@ local function prepare_dkim_signing(N, task, settings)
if st:sub(1, 1) == '/' or st == './' or st == '..' then
res.key = parts[3]:gsub('%%', hdom)
lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, key file=%s',
- hdom, selector, res.domain, res.key)
+ hdom, selector, res.domain, res.key)
else
res.rawkey = parts[3] -- No sanity check here
lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, raw key used',
- hdom, selector, res.domain)
+ hdom, selector, res.domain)
end
return true, { res }
else
logger.errx(task, 'invalid key entry for sign entry %s: %s; when signing %s domain',
- sign_entry, key_entry, hdom)
+ sign_entry, key_entry, hdom)
return false, {}
end
elseif settings.use_vault then
-- Sign table is presented, the rest is covered by vault
lua_util.debugm(N, task, 'check vault for %s, by sign entry %s, key entry is missing',
- hdom, sign_entry)
+ hdom, sign_entry)
return true, {
domain = sign_entry,
vault = true
}
else
logger.errx(task, 'missing key entry for sign entry %s; when signing %s domain',
- sign_entry, hdom)
+ sign_entry, hdom)
return false, {}
end
else
logger.errx(task, 'cannot get key entry for signing entry %s, when signing %s domain',
- sign_entry, hdom)
+ sign_entry, hdom)
return false, {}
end
else
lua_util.debugm(N, task,
- 'signing_table: no entry for %s', hfrom[1].addr)
+ 'signing_table: no entry for %s', hfrom[1].addr)
return false, {}
end
else
if settings.use_domain_sign_networks and is_sign_networks then
dkim_domain = get_dkim_domain('use_domain_sign_networks')
lua_util.debugm(N, task,
- 'sign_networks: use domain(%s) for signature: %s',
- settings.use_domain_sign_networks, dkim_domain)
+ 'sign_networks: use domain(%s) for signature: %s',
+ settings.use_domain_sign_networks, dkim_domain)
elseif settings.use_domain_sign_local and is_local then
dkim_domain = get_dkim_domain('use_domain_sign_local')
lua_util.debugm(N, task, 'local: use domain(%s) for signature: %s',
- settings.use_domain_sign_local, dkim_domain)
+ settings.use_domain_sign_local, dkim_domain)
elseif settings.use_domain_sign_inbound and not is_local and not auser then
dkim_domain = get_dkim_domain('use_domain_sign_inbound')
lua_util.debugm(N, task, 'inbound: use domain(%s) for signature: %s',
- settings.use_domain_sign_inbound, dkim_domain)
+ settings.use_domain_sign_inbound, dkim_domain)
elseif settings.use_domain_custom then
if type(settings.use_domain_custom) == 'string' then
-- Load custom function
@@ -387,10 +386,10 @@ local function prepare_dkim_signing(N, task, settings)
settings.use_domain_custom = res_or_err
dkim_domain = settings.use_domain_custom(task)
lua_util.debugm(N, task, 'use custom domain for signing: %s',
- dkim_domain)
+ dkim_domain)
else
logger.errx(task, 'cannot load dkim domain custom script: invalid type: %s, expected function',
- type(res_or_err))
+ type(res_or_err))
settings.use_domain_custom = nil
end
else
@@ -400,12 +399,12 @@ local function prepare_dkim_signing(N, task, settings)
else
dkim_domain = settings.use_domain_custom(task)
lua_util.debugm(N, task, 'use custom domain for signing: %s',
- dkim_domain)
+ dkim_domain)
end
else
dkim_domain = get_dkim_domain('use_domain')
lua_util.debugm(N, task, 'use domain(%s) for signature: %s',
- settings.use_domain, dkim_domain)
+ settings.use_domain, dkim_domain)
end
end
@@ -467,7 +466,7 @@ local function prepare_dkim_signing(N, task, settings)
})
else
lua_util.debugm(N, task, 'domain %s is not designated for vault',
- dkim_domain)
+ dkim_domain)
end
else
-- TODO: try every domain in the vault
@@ -501,7 +500,7 @@ local function prepare_dkim_signing(N, task, settings)
if ret then
table.insert(p, k)
lua_util.debugm(N, task, 'using mempool selector %s with key %s',
- k.selector, k.key)
+ k.selector, k.key)
end
end
@@ -530,11 +529,11 @@ local function prepare_dkim_signing(N, task, settings)
if not settings.use_redis then
insert_or_update_prop(N, task, p, 'key',
- 'default path', settings.path)
+ 'default path', settings.path)
end
insert_or_update_prop(N, task, p, 'selector',
- 'default selector', settings.selector)
+ 'default selector', settings.selector)
if settings.check_violation then
if not check_violation(N, task, p.domain) then
@@ -543,7 +542,7 @@ local function prepare_dkim_signing(N, task, settings)
end
insert_or_update_prop(N, task, p, 'domain', 'dkim_domain',
- dkim_domain)
+ dkim_domain)
return #p > 0 and true or false, p
end
@@ -560,53 +559,53 @@ exports.sign_using_redis = function(N, task, settings, selectors, sign_func, err
local function redis_key_cb(err, data)
if err then
err_func(string.format("cannot make request to load DKIM key for %s: %s",
- rk, err))
+ rk, err))
elseif type(data) ~= 'string' then
lua_util.debugm(N, task, "missing DKIM key for %s", rk)
else
p.rawkey = data
lua_util.debugm(N, task, 'found and parsed key for %s:%s in Redis',
- p.domain, p.selector)
+ p.domain, p.selector)
sign_func(task, p)
end
end
local rret = lua_redis.redis_make_request(task,
- settings.redis_params, -- connect params
- rk, -- hash key
- false, -- is write
- redis_key_cb, --callback
- 'HGET', -- command
- { settings.key_prefix, rk } -- arguments
+ settings.redis_params, -- connect params
+ rk, -- hash key
+ false, -- is write
+ redis_key_cb, --callback
+ 'HGET', -- command
+ { settings.key_prefix, rk } -- arguments
)
if not rret then
err_func(task,
- string.format("cannot make request to load DKIM key for %s", rk))
+ string.format("cannot make request to load DKIM key for %s", rk))
end
end
for _, p in ipairs(selectors) do
if settings.selector_prefix then
logger.infox(task, "using selector prefix '%s' for domain '%s'",
- settings.selector_prefix, p.domain);
+ settings.selector_prefix, p.domain);
local function redis_selector_cb(err, data)
if err or type(data) ~= 'string' then
err_func(task, string.format("cannot make request to load DKIM selector for domain %s: %s",
- p.domain, err))
+ p.domain, err))
else
try_redis_key(data, p)
end
end
local rret = lua_redis.redis_make_request(task,
- settings.redis_params, -- connect params
- p.domain, -- hash key
- false, -- is write
- redis_selector_cb, --callback
- 'HGET', -- command
- { settings.selector_prefix, p.domain } -- arguments
+ settings.redis_params, -- connect params
+ p.domain, -- hash key
+ false, -- is write
+ redis_selector_cb, --callback
+ 'HGET', -- command
+ { settings.selector_prefix, p.domain } -- arguments
)
if not rret then
err_func(task, string.format("cannot make Redis request to load DKIM selector for domain %s",
- p.domain))
+ p.domain))
end
else
try_redis_key(p.selector, p)
@@ -619,25 +618,25 @@ exports.sign_using_vault = function(N, task, settings, selector, sign_func, err_
local ucl = require "ucl"
local full_url = string.format('%s/v1/%s/%s',
- settings.vault_url, settings.vault_path or 'dkim', selector.domain)
+ settings.vault_url, settings.vault_path or 'dkim', selector.domain)
local upstream_list = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), settings.vault_url)
local function vault_callback(err, code, body, _)
if code ~= 200 then
err_func(task, string.format('cannot request data from the vault url: %s; %s (%s)',
- full_url, err, body))
+ full_url, err, body))
else
local parser = ucl.parser()
local res, parser_err = parser:parse_string(body)
if not res then
err_func(task, string.format('vault reply for %s (data=%s) cannot be parsed: %s',
- full_url, body, parser_err))
+ full_url, body, parser_err))
else
local obj = parser:get_object()
if not obj or not obj.data then
err_func(task, string.format('vault reply for %s (data=%s) is invalid, no data',
- full_url, body))
+ full_url, body))
else
local elts = obj.data.selectors or {}
local errs = {}
@@ -675,13 +674,13 @@ exports.sign_using_vault = function(N, task, settings, selector, sign_func, err_
alg = p.alg,
}
lua_util.debugm(N, task, 'found and parsed key for %s:%s in Vault',
- dkim_sign_data.domain, dkim_sign_data.selector)
+ dkim_sign_data.domain, dkim_sign_data.selector)
nvalid = nvalid + 1
sign_func(task, dkim_sign_data)
end, fun.filter(is_selector_valid, elts))
for _, e in errs do
lua_util.debugm(N, task, 'error found during processing Vault selectors: %s:%s',
- e[1], e[2])
+ e[1], e[2])
end
if nvalid == 0 then
@@ -707,7 +706,7 @@ exports.sign_using_vault = function(N, task, settings, selector, sign_func, err_
if not ret then
err_func(task, string.format("cannot make HTTP request to load DKIM data domain %s",
- selector.domain))
+ selector.domain))
end
end
@@ -732,8 +731,7 @@ exports.process_signing_settings = function(N, settings, opts)
selector_map = { 'map', 'DKIM selectors' },
signing_table = { 'glob', 'DKIM signing table' },
key_table = { 'glob', 'DKIM keys table' },
- vault_domains = { 'glob', 'DKIM signing domains in vault' },
- whitelisted_signers_map = { 'set', 'ARC trusted signers domains' }
+ vault_domains = { 'glob', 'DKIM signing domains in vault' }
}
for k, v in pairs(opts) do
local maybe_map = maps_opts[k]
diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua
index 971ddd95f..4a5abd8ce 100644
--- a/lualib/lua_magic/patterns.lua
+++ b/lualib/lua_magic/patterns.lua
@@ -466,6 +466,23 @@ local patterns = {
},
}
},
+ heic = {
+ matches = {
+ {
+ -- HEIC/HEIF file format signature
+ -- Starts with ftyp followed by specific brand identifiers
+ string = "^....ftyphe[im][cs]",
+ position = 12,
+ weight = 60,
+ },
+ {
+ -- Alternative signature for HEIC/HEIF
+ string = [[^....ftypmif1]],
+ position = 12,
+ weight = 60,
+ },
+ }
+ },
}
return patterns
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index 3dce2e1f8..ad4ae4349 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -279,6 +279,11 @@ local types = {
ct = 'image/bmp',
av_check = false,
},
+ heic = {
+ type = 'image',
+ ct = 'image/heic',
+ av_check = false,
+ },
dwg = {
type = 'image',
ct = 'image/vnd.dwg',
@@ -324,4 +329,4 @@ local types = {
},
}
-return types \ No newline at end of file
+return types
diff --git a/lualib/lua_redis.lua b/lualib/lua_redis.lua
index a21b97f89..195b7759f 100644
--- a/lualib/lua_redis.lua
+++ b/lualib/lua_redis.lua
@@ -1129,9 +1129,9 @@ local function redis_make_request_taskless(ev_base, cfg, redis_params, key,
end
--[[[
--- @function lua_redis.redis_make_request_taskless(ev_base, redis_params, key, is_write, callback, command, args)
+-- @function lua_redis.redis_make_request_taskless(ev_base, cfg, redis_params, key, is_write, callback, command, args)
-- Sends a request to Redis in context where `task` is not available for some specific use-cases
--- Identical to redis_make_request() except in that first parameter is an `event base` object
+-- Identical to redis_make_request() except in that first parameter is an `event base` object and the second one is the 'config' object
--]]
exports.rspamd_redis_make_request_taskless = redis_make_request_taskless
@@ -1207,15 +1207,13 @@ local function prepare_redis_call(script)
return options
end
-local function is_all_servers_ready(script)
+local function is_any_server_ready(script)
for _, s in ipairs(script.servers_ready) do
- if s == "unsent" or s == "tempfail" then
- return false
+ if s == "done" then
+ return true
end
end
-
- -- We assume that permanent errors are not recoverable, so we will just skip those servers
- return true
+ return false
end
local function is_all_servers_failed(script)
@@ -1269,7 +1267,7 @@ local function load_script_task(script, task, is_write)
script.sha = data -- We assume that sha is the same on all servers
script.servers_ready[idx] = "done"
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1287,7 +1285,7 @@ local function load_script_task(script, task, is_write)
end
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1314,7 +1312,6 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
err, script.caller.short_src, script.caller.currentline)
opt.upstream:fail()
script.servers_ready[idx] = "failed"
- return
else
-- Assume temporary error
logger.infox(cfg, 'temporary error uploading script %s to %s: %s; registered from: %s:%s',
@@ -1322,7 +1319,6 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
opt.upstream:get_addr():to_string(true),
err, script.caller.short_src, script.caller.currentline)
script.servers_ready[idx] = "tempfail"
- return
end
else
opt.upstream:ok()
@@ -1335,7 +1331,7 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
script.servers_ready[idx] = "done"
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1353,7 +1349,7 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
end
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1482,6 +1478,10 @@ local function exec_redis_script(id, params, callback, keys, args)
script.sha = nil
script.loaded = nil
script.pending_upload = true
+ -- We must initialize all servers as we don't know here which one failed
+ for i, _ in ipairs(script.servers_ready) do
+ script.servers_ready[i] = "unsent"
+ end
-- Reload scripts if this has not been initiated yet
if params.task then
load_script_task(script, params.task)
diff --git a/lualib/lua_scanners/icap.lua b/lualib/lua_scanners/icap.lua
index 2e3ced034..532858793 100644
--- a/lualib/lua_scanners/icap.lua
+++ b/lualib/lua_scanners/icap.lua
@@ -239,13 +239,16 @@ local function icap_check(task, content, digest, rule, maybe_part)
end
end
- local function get_req_headers()
-
+ local function get_req_headers()
local in_client_ip = task:get_from_ip()
+ local in_client_ip_str = in_client_ip:to_string()
local req_hlen = 2
+ if in_client_ip:get_version() == 6 then
+ in_client_ip_str = "ip6-" .. string.gsub(in_client_ip_str, ":", "-")
+ end
if maybe_part then
table.insert(req_headers,
- string.format('GET http://%s/%s HTTP/1.0\r\n', in_client_ip, lua_util.url_encode_string(maybe_part:get_filename())))
+ string.format('GET http://%s/%s HTTP/1.0\r\n', in_client_ip_str, lua_util.url_encode_string(maybe_part:get_filename())))
if rule.use_specific_content_type then
table.insert(http_headers, string.format('Content-Type: %s/%s\r\n', maybe_part:get_detected_type()))
--else
diff --git a/lualib/redis_scripts/bayes_cache_learn.lua b/lualib/redis_scripts/bayes_cache_learn.lua
index d8a2d878e..a7c9ac443 100644
--- a/lualib/redis_scripts/bayes_cache_learn.lua
+++ b/lualib/redis_scripts/bayes_cache_learn.lua
@@ -1,12 +1,15 @@
--- Lua script to perform cache checking for bayes classification
+-- Lua script to perform cache checking for bayes classification (multi-class)
-- This script accepts the following parameters:
-- key1 - cache id
--- key3 - is spam (1 or 0)
+-- key2 - class_id (numeric hash of class name, computed by C side)
-- key3 - configuration table in message pack
local cache_id = KEYS[1]
-local is_spam = KEYS[2]
+local class_id = KEYS[2]
local conf = cmsgpack.unpack(KEYS[3])
+
+-- Use class_id directly as cache value
+local cache_value = tostring(class_id)
cache_id = string.sub(cache_id, 1, conf.cache_elt_len)
-- Try each prefix that is in Redis (as some other instance might have set it)
@@ -15,8 +18,8 @@ for i = 0, conf.cache_max_keys do
local have = redis.call('HGET', prefix, cache_id)
if have then
- -- Already in cache, but is_spam changes when relearning
- redis.call('HSET', prefix, cache_id, is_spam)
+ -- Already in cache, but cache_value changes when relearning
+ redis.call('HSET', prefix, cache_id, cache_value)
return false
end
end
@@ -30,7 +33,7 @@ for i = 0, conf.cache_max_keys do
if count < lim then
-- We can add it to this prefix
- redis.call('HSET', prefix, cache_id, is_spam)
+ redis.call('HSET', prefix, cache_id, cache_value)
added = true
end
end
@@ -46,7 +49,7 @@ if not added then
if exists then
if not expired then
redis.call('DEL', prefix)
- redis.call('HSET', prefix, cache_id, is_spam)
+ redis.call('HSET', prefix, cache_id, cache_value)
-- Do not expire anything else
expired = true
diff --git a/lualib/redis_scripts/bayes_classify.lua b/lualib/redis_scripts/bayes_classify.lua
index e94f645fd..d6132e631 100644
--- a/lualib/redis_scripts/bayes_classify.lua
+++ b/lualib/redis_scripts/bayes_classify.lua
@@ -1,37 +1,68 @@
--- Lua script to perform bayes classification
+-- Lua script to perform bayes classification (multi-class)
-- This script accepts the following parameters:
-- key1 - prefix for bayes tokens (e.g. for per-user classification)
--- key2 - set of tokens encoded in messagepack array of strings
+-- key2 - class labels: table of all class labels as "TABLE:label1,label2,..."
+-- key3 - set of tokens encoded in messagepack array of strings
local prefix = KEYS[1]
-local output_spam = {}
-local output_ham = {}
+local class_labels_arg = KEYS[2]
+local input_tokens = cmsgpack.unpack(KEYS[3])
-local learned_ham = tonumber(redis.call('HGET', prefix, 'learns_ham')) or 0
-local learned_spam = tonumber(redis.call('HGET', prefix, 'learns_spam')) or 0
+-- Parse class labels (always expect TABLE: format)
+local class_labels = {}
+if string.match(class_labels_arg, "^TABLE:") then
+ local labels_str = string.sub(class_labels_arg, 7) -- Remove "TABLE:" prefix
+ for label in string.gmatch(labels_str, "([^,]+)") do
+ table.insert(class_labels, label)
+ end
+else
+ -- Legacy single class - convert to array
+ class_labels = { class_labels_arg }
+end
--- Output is a set of pairs (token_index, token_count), tokens that are not
--- found are not filled.
--- This optimisation will save a lot of space for sparse tokens, and in Bayes that assumption is normally held
+-- Get learned counts for all classes (ordered)
+local learned_counts = {}
+for _, label in ipairs(class_labels) do
+ local key = 'learns_' .. string.lower(label)
+ -- Handle legacy keys for backward compatibility
+ if label == 'H' then
+ key = 'learns_ham'
+ elseif label == 'S' then
+ key = 'learns_spam'
+ end
+ table.insert(learned_counts, tonumber(redis.call('HGET', prefix, key)) or 0)
+end
-if learned_ham > 0 and learned_spam > 0 then
- local input_tokens = cmsgpack.unpack(KEYS[2])
- for i, token in ipairs(input_tokens) do
- local token_data = redis.call('HMGET', token, 'H', 'S')
+-- Get token data for all classes (ordered)
+local token_results = {}
+for i, _ in ipairs(class_labels) do
+ token_results[i] = {}
+end
- if token_data then
- local ham_count = token_data[1]
- local spam_count = token_data[2]
+-- Check if we have any learning data
+local has_learns = false
+for _, count in ipairs(learned_counts) do
+ if count > 0 then
+ has_learns = true
+ break
+ end
+end
- if ham_count then
- table.insert(output_ham, { i, tonumber(ham_count) })
- end
+if has_learns then
+ -- Process each token
+ for i, token in ipairs(input_tokens) do
+ local token_data = redis.call('HMGET', token, unpack(class_labels))
- if spam_count then
- table.insert(output_spam, { i, tonumber(spam_count) })
+ if token_data then
+ for j, _ in ipairs(class_labels) do
+ local count = token_data[j]
+ if count and tonumber(count) > 0 then
+ table.insert(token_results[j], { i, tonumber(count) })
+ end
end
end
end
end
-return { learned_ham, learned_spam, output_ham, output_spam } \ No newline at end of file
+-- Always return ordered arrays: [learned_counts_array, token_results_array]
+return { learned_counts, token_results }
diff --git a/lualib/redis_scripts/bayes_learn.lua b/lualib/redis_scripts/bayes_learn.lua
index 5456165b6..ebc798fe0 100644
--- a/lualib/redis_scripts/bayes_learn.lua
+++ b/lualib/redis_scripts/bayes_learn.lua
@@ -1,14 +1,14 @@
--- Lua script to perform bayes learning
+-- Lua script to perform bayes learning (multi-class)
-- This script accepts the following parameters:
-- key1 - prefix for bayes tokens (e.g. for per-user classification)
--- key2 - boolean is_spam
+-- key2 - class label string (e.g. "S", "H", "T")
-- key3 - string symbol
-- key4 - boolean is_unlearn
-- key5 - set of tokens encoded in messagepack array of strings
-- key6 - set of text tokens (if any) encoded in messagepack array of strings (size must be twice of `KEYS[5]`)
local prefix = KEYS[1]
-local is_spam = KEYS[2] == 'true' and true or false
+local class_label = KEYS[2]
local symbol = KEYS[3]
local is_unlearn = KEYS[4] == 'true' and true or false
local input_tokens = cmsgpack.unpack(KEYS[5])
@@ -18,15 +18,47 @@ if KEYS[6] then
text_tokens = cmsgpack.unpack(KEYS[6])
end
-local hash_key = is_spam and 'S' or 'H'
-local learned_key = is_spam and 'learns_spam' or 'learns_ham'
+-- Handle backward compatibility for boolean values
+if class_label == 'true' then
+ class_label = 'S' -- spam
+elseif class_label == 'false' then
+ class_label = 'H' -- ham
+end
+
+local hash_key = class_label
+local learned_key = 'learns_' .. string.lower(class_label)
+
+-- Handle legacy keys for backward compatibility
+if class_label == 'S' then
+ learned_key = 'learns_spam'
+elseif class_label == 'H' then
+ learned_key = 'learns_ham'
+end
redis.call('SADD', symbol .. '_keys', prefix)
redis.call('HSET', prefix, 'version', '2') -- new schema
-redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count
+
+-- Update learned count, but prevent it from going negative
+if is_unlearn then
+ local current_count = tonumber(redis.call('HGET', prefix, learned_key)) or 0
+ if current_count > 0 then
+ redis.call('HINCRBY', prefix, learned_key, -1)
+ end
+else
+ redis.call('HINCRBY', prefix, learned_key, 1)
+end
for i, token in ipairs(input_tokens) do
- redis.call('HINCRBY', token, hash_key, is_unlearn and -1 or 1)
+ -- Update token count, but prevent it from going negative
+ if is_unlearn then
+ local current_token_count = tonumber(redis.call('HGET', token, hash_key)) or 0
+ if current_token_count > 0 then
+ redis.call('HINCRBY', token, hash_key, -1)
+ end
+ else
+ redis.call('HINCRBY', token, hash_key, 1)
+ end
+
if text_tokens then
local tok1 = text_tokens[i * 2 - 1]
local tok2 = text_tokens[i * 2]
@@ -38,7 +70,14 @@ for i, token in ipairs(input_tokens) do
redis.call('HSET', token, 'tokens', tok1)
end
- redis.call('ZINCRBY', prefix .. '_z', is_unlearn and -1 or 1, token)
+ if is_unlearn then
+ local current_z_score = tonumber(redis.call('ZSCORE', prefix .. '_z', token)) or 0
+ if current_z_score > 0 then
+ redis.call('ZINCRBY', prefix .. '_z', -1, token)
+ end
+ else
+ redis.call('ZINCRBY', prefix .. '_z', 1, token)
+ end
end
end
end
diff --git a/lualib/rspamadm/dmarc_report.lua b/lualib/rspamadm/dmarc_report.lua
index 71ff5d163..fb28a9264 100644
--- a/lualib/rspamadm/dmarc_report.lua
+++ b/lualib/rspamadm/dmarc_report.lua
@@ -99,6 +99,8 @@ local redis_attrs = {
log_obj = rspamd_config,
resolver = rspamadm_dns_resolver,
}
+local redis_attrs_write = lua_util.shallowcopy(redis_attrs)
+redis_attrs_write['is_write'] = true
local pool
local function load_config(opts)
@@ -481,7 +483,7 @@ local function prepare_report(opts, start_time, end_time, rep_key)
-- Rename report key to avoid races
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'RENAME', rep_key, rep_key .. '_processing' })
rep_key = rep_key .. '_processing'
end
@@ -491,7 +493,7 @@ local function prepare_report(opts, start_time, end_time, rep_key)
if not dmarc_record then
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'DEL', rep_key })
end
logger.messagex('Cannot process reports for domain %s; invalid dmarc record', reporting_domain)
@@ -554,7 +556,7 @@ local function prepare_report(opts, start_time, end_time, rep_key)
lua_util.debugm(N, 'got final message: %s', message)
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'DEL', rep_key })
end
@@ -585,7 +587,7 @@ local function process_report_date(opts, start_time, end_time, date)
-- Rename index key to avoid races
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'RENAME', idx_key, idx_key .. '_processing' })
idx_key = idx_key .. '_processing'
end
@@ -595,7 +597,7 @@ local function process_report_date(opts, start_time, end_time, date)
if not ret or not results then
-- Remove bad key
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'DEL', idx_key })
end
logger.messagex('Cannot get reports for %s', date)
@@ -615,7 +617,7 @@ local function process_report_date(opts, start_time, end_time, date)
lua_util.shuffle(reports)
-- Remove processed key
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'DEL', idx_key })
end
@@ -715,11 +717,11 @@ local function handler(args)
if not opts.no_opt then
lua_util.debugm(N, 'set last report date to %s', start_collection)
-- Hack to avoid coroutines + async functions mess: we use async redis call here
- redis_attrs.callback = function()
+ redis_attrs_write.callback = function()
logger.messagex('Reporting collection has finished %s dates processed, %s reports: %s completed, %s failed',
ndates, nreports, nsuccess, nfail)
end
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'SETEX', 'rspamd_dmarc_last_collection', dmarc_settings.reporting.keys_expire * 2,
tostring(start_collection) })
else
diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua
index e0b23e16c..a20e47e23 100644
--- a/lualib/rspamadm/mime.lua
+++ b/lualib/rspamadm/mime.lua
@@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-]]--
+]] --
local argparse = require "argparse"
local ansicolors = require "ansicolors"
@@ -35,94 +35,94 @@ local parser = argparse()
:require_command(true)
parser:option "-c --config"
- :description "Path to config file"
- :argname("<cfg>")
- :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
+ :description "Path to config file"
+ :argname("<cfg>")
+ :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
parser:mutex(
- parser:flag "-j --json"
- :description "JSON output",
- parser:flag "-U --ucl"
- :description "UCL output",
- parser:flag "-M --messagepack"
- :description "MessagePack output"
+ parser:flag "-j --json"
+ :description "JSON output",
+ parser:flag "-U --ucl"
+ :description "UCL output",
+ parser:flag "-M --messagepack"
+ :description "MessagePack output"
)
parser:flag "-C --compact"
- :description "Use compact format"
+ :description "Use compact format"
parser:flag "--no-file"
- :description "Do not print filename"
+ :description "Do not print filename"
-- Extract subcommand
local extract = parser:command "extract ex e"
- :description "Extracts data from MIME messages"
+ :description "Extracts data from MIME messages"
extract:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
extract:flag "-t --text"
- :description "Extracts plain text data from a message"
+ :description "Extracts plain text data from a message"
extract:flag "-H --html"
- :description "Extracts htm data from a message"
+ :description "Extracts htm data from a message"
extract:option "-o --output"
- :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
- :argname("<type>")
- :convert {
- raw = "raw",
- content = "content",
- oneline = "content_oneline",
- decoded = "raw_parsed",
- decoded_utf = "raw_utf"
-}
- :default "content"
+ :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
+ :argname("<type>")
+ :convert {
+ raw = "raw",
+ content = "content",
+ oneline = "content_oneline",
+ decoded = "raw_parsed",
+ decoded_utf = "raw_utf"
+ }
+ :default "content"
extract:flag "-w --words"
- :description "Extracts words"
+ :description "Extracts words"
extract:flag "-p --part"
- :description "Show part info"
+ :description "Show part info"
extract:flag "-s --structure"
- :description "Show structure info (e.g. HTML tags)"
+ :description "Show structure info (e.g. HTML tags)"
extract:flag "-i --invisible"
- :description "Show invisible content for HTML parts"
+ :description "Show invisible content for HTML parts"
extract:option "-F --words-format"
- :description "Words format ('stem', 'norm', 'raw', 'full')"
- :argname("<type>")
- :convert {
- stem = "stem",
- norm = "norm",
- raw = "raw",
- full = "full",
-}
- :default "stem"
+ :description "Words format ('stem', 'norm', 'raw', 'full')"
+ :argname("<type>")
+ :convert {
+ stem = "stem",
+ norm = "norm",
+ raw = "raw",
+ full = "full",
+ }
+ :default "stem"
local stat = parser:command "stat st s"
- :description "Extracts statistical data from MIME messages"
+ :description "Extracts statistical data from MIME messages"
stat:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
stat:mutex(
- stat:flag "-m --meta"
- :description "Lua metatokens",
- stat:flag "-b --bayes"
- :description "Bayes tokens",
- stat:flag "-F --fuzzy"
- :description "Fuzzy hashes"
+ stat:flag "-m --meta"
+ :description "Lua metatokens",
+ stat:flag "-b --bayes"
+ :description "Bayes tokens",
+ stat:flag "-F --fuzzy"
+ :description "Fuzzy hashes"
)
stat:flag "-s --shingles"
:description "Show shingles for fuzzy hashes"
local urls = parser:command "urls url u"
- :description "Extracts URLs from MIME messages"
+ :description "Extracts URLs from MIME messages"
urls:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
urls:mutex(
- urls:flag "-t --tld"
- :description "Get TLDs only",
- urls:flag "-H --host"
- :description "Get hosts only",
- urls:flag "-f --full"
- :description "Show piecewise urls as processed by Rspamd"
+ urls:flag "-t --tld"
+ :description "Get TLDs only",
+ urls:flag "-H --host"
+ :description "Get hosts only",
+ urls:flag "-f --full"
+ :description "Show piecewise urls as processed by Rspamd"
)
urls:flag "-u --unique"
@@ -135,75 +135,75 @@ urls:flag "-r --reverse"
:description "Reverse sort order"
local modify = parser:command "modify mod m"
- :description "Modifies MIME message"
+ :description "Modifies MIME message"
modify:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
modify:option "-a --add-header"
- :description "Adds specific header"
- :argname "<header=value>"
- :count "*"
+ :description "Adds specific header"
+ :argname "<header=value>"
+ :count "*"
modify:option "-r --remove-header"
- :description "Removes specific header (all occurrences)"
- :argname "<header>"
- :count "*"
+ :description "Removes specific header (all occurrences)"
+ :argname "<header>"
+ :count "*"
modify:option "-R --rewrite-header"
- :description "Rewrites specific header, uses Lua string.format pattern"
- :argname "<header=pattern>"
- :count "*"
+ :description "Rewrites specific header, uses Lua string.format pattern"
+ :argname "<header=pattern>"
+ :count "*"
modify:option "-t --text-footer"
- :description "Adds footer to text/plain parts from a specific file"
- :argname "<file>"
+ :description "Adds footer to text/plain parts from a specific file"
+ :argname "<file>"
modify:option "-H --html-footer"
- :description "Adds footer to text/html parts from a specific file"
- :argname "<file>"
+ :description "Adds footer to text/html parts from a specific file"
+ :argname "<file>"
local strip = parser:command "strip"
- :description "Strip attachments from a message"
+ :description "Strip attachments from a message"
strip:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
strip:flag "-i --keep-images"
- :description "Keep images"
+ :description "Keep images"
strip:option "--min-text-size"
- :description "Minimal text size to keep"
- :argname "<size>"
- :convert(tonumber)
- :default(0)
+ :description "Minimal text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(0)
strip:option "--max-text-size"
- :description "Max text size to keep"
- :argname "<size>"
- :convert(tonumber)
- :default(math.huge)
+ :description "Max text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(math.huge)
local anonymize = parser:command "anonymize"
- :description "Try to remove sensitive information from a message"
+ :description "Try to remove sensitive information from a message"
anonymize:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
anonymize:option "--exclude-header -X"
- :description "Exclude specific headers from anonymization"
- :argname "<header>"
- :count "*"
+ :description "Exclude specific headers from anonymization"
+ :argname "<header>"
+ :count "*"
anonymize:option "--include-header -I"
- :description "Include specific headers from anonymization"
- :argname "<header>"
- :count "*"
+ :description "Include specific headers from anonymization"
+ :argname "<header>"
+ :count "*"
anonymize:flag "--gpt"
- :description "Use LLM model for anonymization (requires GPT plugin to be configured)"
+ :description "Use LLM model for anonymization (requires GPT plugin to be configured)"
anonymize:option "--model"
- :description "Model to use for anonymization"
- :argname "<model>"
+ :description "Model to use for anonymization"
+ :argname "<model>"
anonymize:option "--prompt"
- :description "Prompt to use for anonymization"
- :argname "<prompt>"
+ :description "Prompt to use for anonymization"
+ :argname "<prompt>"
local sign = parser:command "sign"
- :description "Performs DKIM signing"
+ :description "Performs DKIM signing"
sign:argument "file"
:description "File to process"
:argname "<file>"
@@ -225,33 +225,33 @@ sign:option "-t --type"
:description "ARC or DKIM signing"
:argname("<arc|dkim>")
:convert {
- ['arc'] = 'arc',
- ['dkim'] = 'dkim',
-}
+ ['arc'] = 'arc',
+ ['dkim'] = 'dkim',
+ }
:default 'dkim'
sign:option "-o --output"
:description "Output format"
:argname("<message|signature>")
:convert {
- ['message'] = 'message',
- ['signature'] = 'signature',
-}
+ ['message'] = 'message',
+ ['signature'] = 'signature',
+ }
:default 'message'
local dump = parser:command "dump"
- :description "Dumps a raw message in different formats"
+ :description "Dumps a raw message in different formats"
dump:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
-- Duplicate format for convenience
dump:mutex(
- parser:flag "-j --json"
- :description "JSON output",
- parser:flag "-U --ucl"
- :description "UCL output",
- parser:flag "-M --messagepack"
- :description "MessagePack output"
+ parser:flag "-j --json"
+ :description "JSON output",
+ parser:flag "-U --ucl"
+ :description "UCL output",
+ parser:flag "-M --messagepack"
+ :description "MessagePack output"
)
dump:flag "-s --split"
:description "Split the output file contents such that no content is embedded"
@@ -260,7 +260,7 @@ dump:option "-o --outdir"
:description "Output directory"
:argname("<directory>")
-local function load_config(opts)
+local function load_config(opts, load_tokenizers)
local _r, err = rspamd_config:load_ucl(opts['config'])
if not _r then
@@ -273,6 +273,23 @@ local function load_config(opts)
rspamd_logger.errx('cannot process %s: %s', opts['config'], err)
os.exit(1)
end
+
+ -- Load custom tokenizers if requested
+ if load_tokenizers then
+ local success, tokenizer_err = rspamd_config:load_custom_tokenizers()
+ if not success then
+ rspamd_logger.errx('cannot load custom tokenizers: %s', tokenizer_err or 'unknown error')
+ -- Don't exit here as custom tokenizers are optional
+ rspamd_logger.warnx('proceeding without custom tokenizers')
+ end
+ end
+end
+
+-- Helper function to ensure proper cleanup of tokenizers
+local function cleanup_tokenizers()
+ if rspamd_config then
+ rspamd_config:unload_custom_tokenizers()
+ end
end
local function load_task(_, fname)
@@ -288,13 +305,13 @@ local function load_task(_, fname)
if not res then
parser:error(string.format('cannot read message from %s: %s', fname,
- task))
+ task))
return nil
end
if not task:process_message() then
parser:error(string.format('cannot read message from %s: %s', fname,
- 'failed to parse'))
+ 'failed to parse'))
return nil
end
@@ -335,7 +352,6 @@ local function print_elts(elts, opts, func)
io.write(ucl.to_format(elts, output_fmt(opts)))
else
fun.each(function(fname, elt)
-
if not opts.json and not opts.ucl then
if func then
elt = fun.map(func, elt)
@@ -357,7 +373,7 @@ local function extract_handler(opts)
if opts.words then
-- Enable stemming and urls detection
- load_config(opts)
+ load_config(opts, true) -- Load with custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
rspamd_config:init_subsystem('langdet')
end
@@ -372,39 +388,38 @@ local function extract_handler(opts)
if not opts.json and not opts.ucl then
table.insert(out,
- rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
- part:get_mimepart():get_digest():sub(1, 8),
- t,
- part:get_language(),
- part:get_length(), part:get_raw_length(),
- part:get_words_count()))
+ rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
+ part:get_mimepart():get_digest():sub(1, 8),
+ t,
+ part:get_language(),
+ part:get_length(), part:get_raw_length(),
+ part:get_words_count()))
table.insert(out,
- rspamd_logger.slog('Stats: %s',
- fun.foldl(function(acc, k, v)
- if acc ~= '' then
- return string.format('%s, %s:%s', acc, k, v)
- else
- return string.format('%s:%s', k, v)
- end
- end, '', part:get_stats())))
+ rspamd_logger.slog('Stats: %s',
+ fun.foldl(function(acc, k, v)
+ if acc ~= '' then
+ return string.format('%s, %s:%s', acc, k, v)
+ else
+ return string.format('%s:%s', k, v)
+ end
+ end, '', part:get_stats())))
end
end
end
local function maybe_print_mime_part_info(part, out)
if opts.part then
-
if not opts.json and not opts.ucl then
local mtype, msubtype = part:get_type()
local det_mtype, det_msubtype = part:get_detected_type()
table.insert(out,
- rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
- part:get_digest():sub(1, 8),
- mtype, msubtype,
- det_mtype, det_msubtype,
- part:get_filename(),
- part:get_detected_ext(),
- part:get_length()))
+ rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
+ part:get_digest():sub(1, 8),
+ mtype, msubtype,
+ det_mtype, det_msubtype,
+ part:get_filename(),
+ part:get_detected_ext(),
+ part:get_length()))
end
end
end
@@ -416,17 +431,17 @@ local function extract_handler(opts)
return table.concat(words, ' ')
else
return table.concat(
- fun.totable(
- fun.map(function(w)
- -- [1] - stemmed word
- -- [2] - normalised word
- -- [3] - raw word
- -- [4] - flags (table of strings)
- return string.format('%s|%s|%s(%s)',
- w[3], w[2], w[1], table.concat(w[4], ','))
- end, words)
- ),
- ' '
+ fun.totable(
+ fun.map(function(w)
+ -- [1] - stemmed word
+ -- [2] - normalised word
+ -- [3] - raw word
+ -- [4] - flags (table of strings)
+ return string.format('%s|%s|%s(%s)',
+ w[3], w[2], w[1], table.concat(w[4], ','))
+ end, words)
+ ),
+ ' '
)
end
end
@@ -443,7 +458,7 @@ local function extract_handler(opts)
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], 'meta_words: ' ..
- print_words(task:get_meta_words(how_words), how_words == 'full'))
+ print_words(task:get_meta_words(how_words), how_words == 'full'))
end
if opts.text or opts.html then
@@ -466,7 +481,7 @@ local function extract_handler(opts)
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], print_words(part:get_words(how_words),
- how_words == 'full'))
+ how_words == 'full'))
else
table.insert(out_elts[fname], tostring(part:get_content(how)))
end
@@ -480,7 +495,7 @@ local function extract_handler(opts)
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], print_words(part:get_words(how_words),
- how_words == 'full'))
+ how_words == 'full'))
else
if opts.structure then
local hc = part:get_html()
@@ -489,11 +504,11 @@ local function extract_handler(opts)
local fun = require "fun"
if type(elt) == 'table' then
return table.concat(fun.totable(
- fun.map(
- function(t)
- return rspamd_logger.slog("%s", t)
- end,
- elt)), '\n')
+ fun.map(
+ function(t)
+ return rspamd_logger.slog("%s", t)
+ end,
+ elt)), '\n')
else
return rspamd_logger.slog("%s", elt)
end
@@ -524,7 +539,7 @@ local function extract_handler(opts)
if opts.invisible then
local hc = part:get_html()
table.insert(out_elts[fname], string.format('invisible content: %s',
- tostring(hc:get_invisible())))
+ tostring(hc:get_invisible())))
end
end
end
@@ -544,13 +559,18 @@ local function extract_handler(opts)
for _, task in ipairs(tasks) do
task:destroy()
end
+
+ -- Cleanup custom tokenizers if they were loaded
+ if opts.words then
+ cleanup_tokenizers()
+ end
end
local function stat_handler(opts)
local fun = require "fun"
local out_elts = {}
- load_config(opts)
+ load_config(opts, true) -- Load with custom tokenizers for stat generation
rspamd_url.init(rspamd_config:get_tld_path())
rspamd_config:init_subsystem('langdet,stat') -- Needed to gen stat tokens
@@ -571,10 +591,10 @@ local function stat_handler(opts)
out_elts[fname] = bt
process_func = function(e)
return string.format('%s (%d): "%s"+"%s", [%s]', e.data, e.win, e.t1 or "",
- e.t2 or "", table.concat(fun.totable(
- fun.map(function(k)
- return k
- end, e.flags)), ","))
+ e.t2 or "", table.concat(fun.totable(
+ fun.map(function(k)
+ return k
+ end, e.flags)), ","))
end
elseif opts.fuzzy then
local parts = task:get_parts() or {}
@@ -601,16 +621,16 @@ local function stat_handler(opts)
digest = digest,
shingles = shingles,
type = string.format('%s/%s',
- ({ part:get_type() })[1],
- ({ part:get_type() })[2])
+ ({ part:get_type() })[1],
+ ({ part:get_type() })[2])
})
else
table.insert(out_elts[fname], {
digest = part:get_digest(),
file = part:get_filename(),
type = string.format('%s/%s',
- ({ part:get_type() })[1],
- ({ part:get_type() })[2])
+ ({ part:get_type() })[1],
+ ({ part:get_type() })[2])
})
end
end
@@ -621,10 +641,13 @@ local function stat_handler(opts)
end
print_elts(out_elts, opts, process_func)
+
+ -- Cleanup custom tokenizers
+ cleanup_tokenizers()
end
local function urls_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- URLs don't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
local out_elts = {}
@@ -764,7 +787,7 @@ local function newline(task)
end
local function modify_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Modification doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
local function read_file(file)
@@ -804,10 +827,10 @@ local function modify_handler(opts)
if hname == name then
local new_value = string.format(hpattern, hdr.decoded)
new_value = string.format('%s:%s%s',
- name, hdr.separator,
- rspamd_util.fold_header(name,
- rspamd_util.mime_header_encode(new_value),
- task:get_newlines_type()))
+ name, hdr.separator,
+ rspamd_util.fold_header(name,
+ rspamd_util.mime_header_encode(new_value),
+ task:get_newlines_type()))
out[#out + 1] = new_value
return
end
@@ -816,12 +839,12 @@ local function modify_handler(opts)
if rewrite.need_rewrite_ct then
if name:lower() == 'content-type' then
local nct = string.format('%s: %s/%s; charset=utf-8',
- 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
+ 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
out[#out + 1] = nct
return
elseif name:lower() == 'content-transfer-encoding' then
out[#out + 1] = string.format('%s: %s',
- 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+ 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
seen_cte = true
return
end
@@ -837,13 +860,13 @@ local function modify_handler(opts)
if hname and hvalue then
out[#out + 1] = string.format('%s: %s', hname,
- rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
+ rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
end
end
if not seen_cte and rewrite.need_rewrite_ct then
out[#out + 1] = string.format('%s: %s',
- 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+ 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
end
-- End of headers
@@ -883,7 +906,7 @@ local function modify_handler(opts)
end
local function sign_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Signing doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
local lua_dkim = require("lua_ffi").dkim
@@ -927,11 +950,11 @@ local function sign_handler(opts)
io.flush()
else
local dkim_hdr = string.format('%s: %s%s',
- 'DKIM-Signature',
- rspamd_util.fold_header('DKIM-Signature',
- rspamd_util.mime_header_encode(sig),
- task:get_newlines_type()),
- newline(task))
+ 'DKIM-Signature',
+ rspamd_util.fold_header('DKIM-Signature',
+ rspamd_util.mime_header_encode(sig),
+ task:get_newlines_type()),
+ newline(task))
io.write(dkim_hdr)
io.flush()
task:get_content():save_in_file(1)
@@ -942,7 +965,7 @@ local function sign_handler(opts)
end
local function strip_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Stripping doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
for _, fname in ipairs(opts.file) do
@@ -998,7 +1021,7 @@ local function strip_handler(opts)
end
local function anonymize_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Anonymization doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
for _, fname in ipairs(opts.file) do
@@ -1103,7 +1126,7 @@ local function get_dump_content(task, opts, fname)
end
local function dump_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Dumping doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
for _, fname in ipairs(opts.file) do