aboutsummaryrefslogtreecommitdiffstats
path: root/lualib
diff options
context:
space:
mode:
Diffstat (limited to 'lualib')
-rw-r--r--lualib/lua_bayes_redis.lua67
-rw-r--r--lualib/lua_cache.lua475
-rw-r--r--lualib/lua_cfg_transform.lua22
-rw-r--r--lualib/lua_dkim_tools.lua112
-rw-r--r--lualib/lua_magic/patterns.lua17
-rw-r--r--lualib/lua_magic/types.lua7
-rw-r--r--lualib/lua_maps.lua100
-rw-r--r--lualib/lua_maps_expressions.lua2
-rw-r--r--lualib/lua_mime.lua24
-rw-r--r--lualib/lua_mime_types.lua2
-rw-r--r--lualib/lua_redis.lua44
-rw-r--r--lualib/lua_scanners/cloudmark.lua49
-rw-r--r--lualib/lua_scanners/icap.lua9
-rw-r--r--lualib/lua_util.lua51
-rw-r--r--lualib/plugins/neural.lua2
-rw-r--r--lualib/plugins/rbl.lua5
-rw-r--r--lualib/redis_scripts/bayes_cache_learn.lua17
-rw-r--r--lualib/redis_scripts/bayes_classify.lua75
-rw-r--r--lualib/redis_scripts/bayes_learn.lua55
-rw-r--r--lualib/redis_scripts/neural_save_unlock.lua13
-rw-r--r--lualib/rspamadm/dmarc_report.lua18
-rw-r--r--lualib/rspamadm/mime.lua401
-rw-r--r--lualib/rspamadm/statistics_dump.lua20
23 files changed, 1160 insertions, 427 deletions
diff --git a/lualib/lua_bayes_redis.lua b/lualib/lua_bayes_redis.lua
index 782e6fc47..a7af80bf1 100644
--- a/lualib/lua_bayes_redis.lua
+++ b/lualib/lua_bayes_redis.lua
@@ -25,27 +25,44 @@ local ucl = require "ucl"
local N = "bayes"
local function gen_classify_functor(redis_params, classify_script_id)
- return function(task, expanded_key, id, is_spam, stat_tokens, callback)
-
+ return function(task, expanded_key, id, class_labels, stat_tokens, callback)
local function classify_redis_cb(err, data)
lua_util.debugm(N, task, 'classify redis cb: %s, %s', err, data)
if err then
callback(task, false, err)
else
- callback(task, true, data[1], data[2], data[3], data[4])
+ -- Pass the raw data table to the C++ callback for processing
+ -- The C++ callback will handle both binary and multi-class formats
+ callback(task, true, data)
+ end
+ end
+
+ -- Determine class labels to send to Redis script
+ local script_class_labels
+ if type(class_labels) == "table" then
+ -- Use simple comma-separated string instead of messagepack
+ script_class_labels = "TABLE:" .. table.concat(class_labels, ",")
+ else
+ -- Single class label or boolean compatibility
+ if class_labels == true or class_labels == "true" then
+ script_class_labels = "S" -- spam
+ elseif class_labels == false or class_labels == "false" then
+ script_class_labels = "H" -- ham
+ else
+ script_class_labels = class_labels -- string class label
end
end
lua_redis.exec_redis_script(classify_script_id,
{ task = task, is_write = false, key = expanded_key },
- classify_redis_cb, { expanded_key, stat_tokens })
+ classify_redis_cb, { expanded_key, script_class_labels, stat_tokens })
end
end
local function gen_learn_functor(redis_params, learn_script_id)
- return function(task, expanded_key, id, is_spam, symbol, is_unlearn, stat_tokens, callback, maybe_text_tokens)
+ return function(task, expanded_key, id, class_label, symbol, is_unlearn, stat_tokens, callback, maybe_text_tokens)
local function learn_redis_cb(err, data)
- lua_util.debugm(N, task, 'learn redis cb: %s, %s', err, data)
+ lua_util.debugm(N, task, 'learn redis cb: %s, %s for class %s', err, data, class_label)
if err then
callback(task, false, err)
else
@@ -53,17 +70,24 @@ local function gen_learn_functor(redis_params, learn_script_id)
end
end
+ -- Convert class_label for backward compatibility
+ local script_class_label = class_label
+ if class_label == true or class_label == "true" then
+ script_class_label = "S" -- spam
+ elseif class_label == false or class_label == "false" then
+ script_class_label = "H" -- ham
+ end
+
if maybe_text_tokens then
lua_redis.exec_redis_script(learn_script_id,
{ task = task, is_write = true, key = expanded_key },
learn_redis_cb,
- { expanded_key, tostring(is_spam), symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens })
+ { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens })
else
lua_redis.exec_redis_script(learn_script_id,
{ task = task, is_write = true, key = expanded_key },
- learn_redis_cb, { expanded_key, tostring(is_spam), symbol, tostring(is_unlearn), stat_tokens })
+ learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens })
end
-
end
end
@@ -112,8 +136,7 @@ end
--- @param classifier_ucl ucl of the classifier config
--- @param statfile_ucl ucl of the statfile config
--- @return a pair of (classify_functor, learn_functor) or `nil` in case of error
-exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol, is_spam, ev_base, stat_periodic_cb)
-
+exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol, class_label, ev_base, stat_periodic_cb)
local redis_params = load_redis_params(classifier_ucl, statfile_ucl)
if not redis_params then
@@ -137,7 +160,6 @@ exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol,
if ev_base then
rspamd_config:add_periodic(ev_base, 0.0, function(cfg, _)
-
local function stat_redis_cb(err, data)
lua_util.debugm(N, cfg, 'stat redis cb: %s, %s', err, data)
@@ -162,11 +184,22 @@ exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol,
end
end
+ -- Convert class_label to learn key
+ local learn_key
+ if class_label == true or class_label == "true" or class_label == "S" then
+ learn_key = "learns_spam"
+ elseif class_label == false or class_label == "false" or class_label == "H" then
+ learn_key = "learns_ham"
+ else
+ -- For other class labels, use learns_<class_label>
+ learn_key = "learns_" .. string.lower(tostring(class_label))
+ end
+
lua_redis.exec_redis_script(stat_script_id,
{ ev_base = ev_base, cfg = cfg, is_write = false },
stat_redis_cb, { tostring(cursor),
symbol,
- is_spam and "learns_spam" or "learns_ham",
+ learn_key,
tostring(max_users) })
return statfile_ucl.monitor_timeout or classifier_ucl.monitor_timeout or 30.0
end)
@@ -178,7 +211,6 @@ end
local function gen_cache_check_functor(redis_params, check_script_id, conf)
local packed_conf = ucl.to_format(conf, 'msgpack')
return function(task, cache_id, callback)
-
local function classify_redis_cb(err, data)
lua_util.debugm(N, task, 'check cache redis cb: %s, %s (%s)', err, data, type(data))
if err then
@@ -201,17 +233,16 @@ end
local function gen_cache_learn_functor(redis_params, learn_script_id, conf)
local packed_conf = ucl.to_format(conf, 'msgpack')
- return function(task, cache_id, is_spam)
+ return function(task, cache_id, class_name, class_id)
local function learn_redis_cb(err, data)
lua_util.debugm(N, task, 'learn_cache redis cb: %s, %s', err, data)
end
- lua_util.debugm(N, task, 'try to learn cache: %s', cache_id)
+ lua_util.debugm(N, task, 'try to learn cache: %s as %s (id=%s)', cache_id, class_name, class_id)
lua_redis.exec_redis_script(learn_script_id,
{ task = task, is_write = true, key = cache_id },
learn_redis_cb,
- { cache_id, is_spam and "1" or "0", packed_conf })
-
+ { cache_id, tostring(class_id), packed_conf })
end
end
diff --git a/lualib/lua_cache.lua b/lualib/lua_cache.lua
new file mode 100644
index 000000000..c87a9dc78
--- /dev/null
+++ b/lualib/lua_cache.lua
@@ -0,0 +1,475 @@
+--[[
+Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[[
+-- @module lua_cache
+-- This module provides a Redis-based caching API for Rspamd with support for
+-- concurrent operations across multiple workers. It includes features like
+-- distributed locking via PENDING markers, automatic key hashing,
+-- configurable serialization formats, and TTL management.
+--
+@example
+local redis_cache = require "lua_cache"
+local redis_params = redis_lib.parse_redis_server('reputation')
+
+-- Create cache context
+local cache_context = redis_cache.create_cache_context(redis_params, {
+ cache_prefix = "rspamd_reputation",
+ cache_ttl = 86400, -- 1 day
+ cache_format = "json",
+ cache_hash_len = 16,
+ cache_use_hashing = true
+})
+
+-- Example usage in a task
+local function process_url_reputation(task, url)
+ local cache_key = url:get_tld()
+
+ -- Try to get data from cache first
+ redis_cache.cache_get(task, cache_key, cache_context, 5.0,
+ -- This callback is called on cache miss
+ function(task)
+ -- Perform expensive reputation lookup
+ local reputation = calculate_reputation(task, url)
+
+ -- Store result in cache for future use
+ redis_cache.cache_set(task, cache_key, {
+ score = reputation.score,
+ categories = reputation.categories,
+ timestamp = os.time()
+ }, cache_context)
+
+ -- Use the result
+ apply_reputation_rules(task, url, reputation)
+ end,
+ -- This callback is called when cache data is available
+ function(task, err, data)
+ if err then
+ logger.errx(task, "Cache error for %s: %s", cache_key, err)
+ return
+ end
+
+ -- Use the cached data
+ apply_reputation_rules(task, url, data)
+ end
+ )
+end
+--]]
+
+local logger = require "rspamd_logger"
+local ucl = require "ucl"
+local lua_util = require "lua_util"
+local rspamd_util = require "rspamd_util"
+local lua_redis = require "lua_redis"
+local hasher = require "rspamd_cryptobox_hash"
+
+local N = "lua_cache"
+local exports = {}
+
+-- Default options
+local default_opts = {
+ cache_prefix = "rspamd_cache",
+ cache_ttl = 3600, -- 1 hour
+ cache_probes = 5, -- Number of times to check a pending key
+ cache_format = "json", -- Serialization format
+ cache_hash_len = 16, -- Number of hex symbols to use for hashed keys
+ cache_use_hashing = false -- Whether to hash keys by default
+}
+
+-- Create a hash of the key using the configured length
+local function hash_key(key, hash_len)
+ local h = hasher.create(key)
+ local hex = h:hex()
+
+ if hash_len and hash_len > 0 and hash_len < #hex then
+ return string.sub(hex, 1, hash_len)
+ end
+
+ return hex
+end
+
+-- Get the appropriate key based on hashing configuration
+local function get_cache_key(raw_key, cache_context, force_hashing)
+ -- Determine whether to hash based on context settings and force parameter
+ local should_hash = force_hashing
+ if should_hash == nil then
+ should_hash = cache_context.opts.cache_use_hashing
+ end
+
+ if should_hash then
+ lua_util.debugm(N, rspamd_config, "hashing key '%s' with hash length %s",
+ raw_key, cache_context.opts.cache_hash_len)
+ return hash_key(raw_key, cache_context.opts.cache_hash_len)
+ else
+ return raw_key
+ end
+end
+
+-- Create a caching context with the provided options
+local function create_cache_context(redis_params, opts, module_name)
+ if not redis_params then
+ return nil, "Redis parameters must be provided"
+ end
+
+ local cache_context = {}
+ cache_context.redis_params = redis_params
+
+ -- Process and merge configuration options
+ cache_context.opts = lua_util.override_defaults(default_opts, opts)
+ cache_context.N = module_name or N
+
+ -- Register Redis prefix
+ lua_redis.register_prefix(cache_context.opts.cache_prefix,
+ "caching",
+ "Cache API prefix")
+
+ lua_util.debugm(N, rspamd_config, "registered redis prefix: %s", cache_context.opts.cache_prefix)
+
+ -- Remove cache related options from opts table
+ if opts then
+ lua_util.debugm(N, rspamd_config, "removing cache options from original opts table")
+ opts.cache_prefix = nil
+ opts.cache_ttl = nil
+ opts.cache_probes = nil
+ opts.cache_format = nil
+ opts.cache_hash_len = nil
+ opts.cache_use_hashing = nil
+ end
+
+ -- Set serialization and deserialization functions
+ if cache_context.opts.cache_format == "messagepack" then
+ lua_util.debugm(cache_context.N, rspamd_config, "using messagepack for serialization")
+
+ cache_context.encode = function(data)
+ return ucl.to_format(data, 'msgpack')
+ end
+
+ cache_context.decode = function(raw_data)
+ local ucl_parser = ucl.parser()
+ local ok, ucl_err = ucl_parser:parse_text(raw_data, 'messagepack')
+ if not ok then
+ lua_util.debugm(cache_context.N, rspamd_config, "failed to parse messagepack data: %s", ucl_err)
+ return nil
+ end
+ return ucl_parser:get_object()
+ end
+ else
+ -- Default to JSON
+ lua_util.debugm(cache_context.N, rspamd_config, "using json for serialization")
+
+ cache_context.encode = function(data)
+ return ucl.to_format(data, 'json')
+ end
+
+ cache_context.decode = function(raw_data)
+ local ucl_parser = ucl.parser()
+ local ok, ucl_err = ucl_parser:parse_text(raw_data)
+ if not ok then
+ lua_util.debugm(cache_context.N, rspamd_config, "failed to parse json data: %s", ucl_err)
+ return nil
+ end
+ return ucl_parser:get_object()
+ end
+ end
+
+ lua_util.debugm(cache_context.N, rspamd_config, "cache context created: %s", cache_context.opts)
+ return cache_context
+end
+
+-- Encode data for storage in Redis with proper formatting
+local function encode_data(data, cache_context)
+ lua_util.debugm(cache_context.N, rspamd_config, "encoding data using %s format", cache_context.opts.cache_format)
+ return cache_context.encode(data)
+end
+
+-- Decode data from Redis with proper formatting
+local function decode_data(data, cache_context)
+ if not data then
+ lua_util.debugm(cache_context.N, rspamd_config, "cannot decode nil data")
+ return nil
+ end
+ lua_util.debugm(cache_context.N, rspamd_config, "decoding data using %s format", cache_context.opts.cache_format)
+ return cache_context.decode(data)
+end
+
+-- Check if a value is a PENDING marker and extract its details
+local function parse_pending_value(value, cache_context)
+ if type(value) ~= 'string' then
+ lua_util.debugm(cache_context.N, rspamd_config, "value is not a string, cannot be a pending marker")
+ return nil
+ end
+
+ -- Check if the value starts with PENDING:
+ if string.sub(value, 1, 8) ~= "PENDING:" then
+ lua_util.debugm(cache_context.N, rspamd_config, "value doesn't start with PENDING: prefix")
+ return nil
+ end
+
+ lua_util.debugm(cache_context.N, rspamd_config, "found PENDING marker, extracting data")
+ local pending_data = string.sub(value, 9)
+ return decode_data(pending_data, cache_context)
+end
+
+-- Create a pending marker with hostname and timeout
+local function create_pending_marker(timeout, cache_context)
+ local hostname = rspamd_util.get_hostname()
+ local pending_data = {
+ hostname = hostname,
+ timeout = timeout,
+ timestamp = os.time()
+ }
+
+ lua_util.debugm(cache_context.N, rspamd_config, "creating PENDING marker for host %s, timeout %s",
+ hostname, timeout)
+
+ return "PENDING:" .. encode_data(pending_data, cache_context)
+end
+
+-- Check cache and handle the result appropriately
+local function cache_get(task, key, cache_context, timeout, callback_uncached, callback_data)
+ if not task or not key or not cache_context or not callback_uncached or not callback_data then
+ logger.errx(task, "missing required parameters for cache_get")
+ return false
+ end
+
+ local full_key = cache_context.opts.cache_prefix .. "_" .. get_cache_key(key, cache_context, false)
+ lua_util.debugm(cache_context.N, task, "cache lookup for key: %s (%s)", key, full_key)
+
+ -- Function to check a pending key
+ local function check_pending(pending_info)
+ local probe_count = 0
+ local probe_interval = timeout / (cache_context.opts.cache_probes or 5)
+
+ lua_util.debugm(cache_context.N, task, "setting up probes for pending key %s, interval: %s seconds",
+ full_key, probe_interval)
+
+ -- Set up a timer to probe the key
+ local function probe_key()
+ probe_count = probe_count + 1
+ lua_util.debugm(cache_context.N, task, "probe #%s/%s for pending key %s",
+ probe_count, cache_context.opts.cache_probes, full_key)
+
+ if probe_count >= cache_context.opts.cache_probes then
+ logger.infox(task, "maximum probes reached for key %s, considering it failed", full_key)
+ lua_util.debugm(cache_context.N, task, "maximum probes reached for key %s, giving up", full_key)
+ callback_data(task, "timeout waiting for pending key", nil)
+ return
+ end
+
+ lua_util.debugm(cache_context.N, task, "probing redis for key %s", full_key)
+ lua_redis.redis_make_request(task, cache_context.redis_params, key, false,
+ function(err, data)
+ if err then
+ logger.errx(task, "redis error while probing key %s: %s", full_key, err)
+ lua_util.debugm(cache_context.N, task, "redis error during probe: %s, retrying later", err)
+ task:add_timer(probe_interval, probe_key)
+ return
+ end
+
+ if not data or type(data) == 'userdata' then
+ lua_util.debugm(cache_context.N, task, "pending key %s disappeared, calling uncached handler", full_key)
+ callback_uncached(task)
+ return
+ end
+
+ local pending = parse_pending_value(data, cache_context)
+ if pending then
+ lua_util.debugm(cache_context.N, task, "key %s still pending (host: %s), retrying later",
+ full_key, pending.hostname)
+ task:add_timer(probe_interval, probe_key)
+ else
+ lua_util.debugm(cache_context.N, task, "pending key %s resolved to actual data", full_key)
+ callback_data(task, nil, decode_data(data, cache_context))
+ end
+ end,
+ 'GET', { full_key }
+ )
+ end
+
+ -- Start the first probe after the initial probe interval
+ lua_util.debugm(cache_context.N, task, "scheduling first probe for %s in %s seconds",
+ full_key, probe_interval)
+ task:add_timer(probe_interval, probe_key)
+ end
+
+ -- Initial cache lookup
+ lua_util.debugm(cache_context.N, task, "making initial redis GET request for key: %s", full_key)
+ lua_redis.redis_make_request(task, cache_context.redis_params, key, false,
+ function(err, data)
+ if err then
+ logger.errx(task, "redis error looking up key %s: %s", full_key, err)
+ lua_util.debugm(cache_context.N, task, "redis error: %s, calling uncached handler", err)
+ callback_uncached(task)
+ return
+ end
+
+ if not data or type(data) == 'userdata' then
+ -- Key not found, set pending and call the uncached callback
+ lua_util.debugm(cache_context.N, task, "key %s not found in cache, creating pending marker", full_key)
+ local pending_marker = create_pending_marker(timeout, cache_context)
+
+ lua_util.debugm(cache_context.N, task, "setting pending marker for key %s with TTL %s",
+ full_key, timeout * 2)
+ lua_redis.redis_make_request(task, cache_context.redis_params, key, true,
+ function(set_err, set_data)
+ if set_err then
+ logger.errx(task, "redis error setting pending marker for %s: %s", full_key, set_err)
+ lua_util.debugm(cache_context.N, task, "failed to set pending marker: %s", set_err)
+ else
+ lua_util.debugm(cache_context.N, task, "successfully set pending marker for %s", full_key)
+ end
+ lua_util.debugm(cache_context.N, task, "calling uncached handler for %s", full_key)
+ callback_uncached(task)
+ end,
+ 'SETEX', { full_key, tostring(timeout * 2), pending_marker }
+ )
+ else
+ -- Key found, check if it's a pending marker or actual data
+ local pending = parse_pending_value(data, cache_context)
+
+ if pending then
+ -- Key is being processed by another worker
+ lua_util.debugm(cache_context.N, task, "key %s is pending on host %s, waiting for result",
+ full_key, pending.hostname)
+ check_pending(pending)
+ else
+ -- Extend TTL and return data
+ lua_util.debugm(cache_context.N, task, "found cached data for key %s, extending TTL to %s",
+ full_key, cache_context.opts.cache_ttl)
+ lua_redis.redis_make_request(task, cache_context.redis_params, key, true,
+ function(expire_err, _)
+ if expire_err then
+ logger.errx(task, "redis error extending TTL for %s: %s", full_key, expire_err)
+ lua_util.debugm(cache_context.N, task, "failed to extend TTL: %s", expire_err)
+ else
+ lua_util.debugm(cache_context.N, task, "successfully extended TTL for %s", full_key)
+ end
+ end,
+ 'EXPIRE', { full_key, tostring(cache_context.opts.cache_ttl) }
+ )
+
+ lua_util.debugm(cache_context.N, task, "returning cached data for key %s", full_key)
+ callback_data(task, nil, decode_data(data, cache_context))
+ end
+ end
+ end,
+ 'GET', { full_key }
+ )
+
+ return true
+end
+
+-- Save data to the cache
+local function cache_set(task, key, data, cache_context)
+ if not task or not key or not data or not cache_context then
+ logger.errx(task, "missing required parameters for cache_set")
+ return false
+ end
+
+ local full_key = cache_context.opts.cache_prefix .. "_" .. get_cache_key(key, cache_context, false)
+ lua_util.debugm(cache_context.N, task, "caching data for key: %s (%s) with TTL: %s",
+ full_key, key, cache_context.opts.cache_ttl)
+
+ local encoded_data = encode_data(data, cache_context)
+
+ -- Store the data with expiration
+ lua_util.debugm(cache_context.N, task, "making redis SETEX request for key: %s", full_key)
+ return lua_redis.redis_make_request(task, cache_context.redis_params, key, true,
+ function(err, result)
+ if err then
+ logger.errx(task, "redis error setting cached data for %s: %s", full_key, err)
+ lua_util.debugm(cache_context.N, task, "failed to cache data: %s", err)
+ else
+ lua_util.debugm(cache_context.N, task, "successfully cached data for key %s", full_key)
+ end
+ end,
+ 'SETEX', { full_key, tostring(cache_context.opts.cache_ttl), encoded_data }
+ )
+end
+
+-- Delete a cache entry
+local function cache_del(task, key, cache_context)
+ if not task or not key or not cache_context then
+ logger.errx(task, "missing required parameters for cache_del")
+ return false
+ end
+
+ local full_key = cache_context.opts.cache_prefix .. "_" .. get_cache_key(key, cache_context, false)
+ lua_util.debugm(cache_context.N, task, "deleting cache key: %s", full_key)
+
+ return lua_redis.redis_make_request(task, cache_context.redis_params, key, true,
+ function(err, result)
+ if err then
+ logger.errx(task, "redis error deleting cache key %s: %s", full_key, err)
+ lua_util.debugm(cache_context.N, task, "failed to delete cache key: %s", err)
+ else
+ local count = tonumber(result) or 0
+ lua_util.debugm(cache_context.N, task, "successfully deleted cache key %s (%s keys removed)",
+ full_key, count)
+ end
+ end,
+ 'DEL', { full_key }
+ )
+end
+
+-- Export the API functions
+---[[[
+-- @function lua_cache.create_cache_context(redis_params, opts, module_name)
+-- Creates a Redis caching context with specified parameters and options
+-- @param {table} redis_params Redis connection parameters (required)
+-- @param {table} opts Optional configuration parameters:
+-- * `cache_prefix`: Key prefix for Redis (default: "rspamd_cache")
+-- * `cache_ttl`: TTL in seconds for cached entries (default: 3600)
+-- * `cache_probes`: Number of times to check pending keys (default: 5)
+-- * `cache_format`: Serialization format - "json" or "messagepack" (default: "json")
+-- * `cache_hash_len`: Number of hex symbols for hashed keys (default: 16)
+-- * `cache_use_hashing`: Whether to hash keys by default (default: true)
+-- @return {table} Cache context or nil + error message on failure
+--]]
+exports.create_cache_context = create_cache_context
+---[[[
+-- @function รง.cache_get(task, key, cache_context, timeout, callback_uncached, callback_data)
+-- Retrieves data from cache, handling pending states and cache misses appropriately
+-- @param {rspamd_task} task Current task (required)
+-- @param {string} key Cache key (required)
+-- @param {table} cache_context Redis cache context from create_cache_context (required)
+-- @param {number} timeout Timeout for pending operations in seconds (required)
+-- @param {function} callback_uncached Function to call on cache miss: callback_uncached(task) (required)
+-- @param {function} callback_data Function to call when data is available: callback_data(task, err, data) (required)
+-- @return {boolean} true if request was initiated successfully, false otherwise
+--]]
+exports.cache_get = cache_get
+---[[[
+-- @function lua_cache.cache_set(task, key, data, cache_context)
+-- Stores data in the cache with the configured TTL
+-- @param {rspamd_task} task Current task (required)
+-- @param {string} key Cache key (required)
+-- @param {table} data Data to store in the cache (required)
+-- @param {table} cache_context Redis cache context from create_cache_context (required)
+-- @return {boolean} true if request was initiated successfully, false otherwise
+--]]
+exports.cache_set = cache_set
+---[[[
+-- @function lua_cache.cache_del(task, key, cache_context)
+-- Deletes data from the cache
+-- @param {rspamd_task} task Current task (required)
+-- @param {string} key Cache key (required)
+-- @param {table} cache_context Redis cache context from create_cache_context (required)
+-- @return {boolean} true if request was initiated successfully, false otherwise
+--]]
+exports.cache_del = cache_del
+
+return exports
diff --git a/lualib/lua_cfg_transform.lua b/lualib/lua_cfg_transform.lua
index 265ca34c0..ec11ef299 100644
--- a/lualib/lua_cfg_transform.lua
+++ b/lualib/lua_cfg_transform.lua
@@ -198,20 +198,22 @@ end
local function symbol_transform(cfg, k, v)
local groups = cfg:at('group')
- -- first try to find any group where there is a definition of this symbol
- for gr_n, gr in groups:pairs() do
- local symbols = gr:at('symbols')
- if symbols and symbols:at(k) then
- -- We override group symbol with ungrouped symbol
- logger.infox("overriding group symbol %s in the group %s", k, gr_n)
- symbols[k] = lua_util.override_defaults(symbols:at(k):unwrap(), v:unwrap())
- return
+ if groups then
+ -- first try to find any group where there is a definition of this symbol
+ for gr_n, gr in groups:pairs() do
+ local symbols = gr:at('symbols')
+ if symbols and symbols:at(k) then
+ -- We override group symbol with ungrouped symbol
+ logger.infox("overriding group symbol %s in the group %s", k, gr_n)
+ symbols[k] = lua_util.override_defaults(symbols:at(k):unwrap(), v:unwrap())
+ return
+ end
end
end
-- Now check what Rspamd knows about this symbol
local sym = rspamd_config:get_symbol(k)
- if not sym or not sym.group then
+ if groups and (not sym or not sym.group) then
-- Otherwise we just use group 'ungrouped'
if not groups:at('ungrouped') then
groups.ungrouped = {
@@ -374,7 +376,7 @@ return function(cfg)
local next_act = actions_order[j]
if actions:at(next_act) and actions:at(next_act):type() == 'number' then
local next_score = actions:at(next_act):unwrap()
- if next_score <= score then
+ if type(score) == 'number' and type(next_score) == 'number' and next_score <= score then
logger.errx(rspamd_config, 'invalid actions thresholds order: action %s (%s) must have lower ' ..
'score than action %s (%s)', act, score, next_act, next_score)
ret = false
diff --git a/lualib/lua_dkim_tools.lua b/lualib/lua_dkim_tools.lua
index b7f520fae..69c9462b5 100644
--- a/lualib/lua_dkim_tools.lua
+++ b/lualib/lua_dkim_tools.lua
@@ -13,7 +13,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-]]--
+]] --
local exports = {}
@@ -33,7 +33,7 @@ local function check_violation(N, task, domain)
if task:has_symbol(sym_check) then
local sym = task:get_symbol(sym_check)[1]
logger.infox(task, 'skip signing for %s: violation %s found: %s',
- domain, sym_check, sym.options)
+ domain, sym_check, sym.options)
return false
end
@@ -92,7 +92,6 @@ local function parse_dkim_http_headers(N, task, settings)
local key = task:get_request_header(headers.key_header)
if not (domain and selector and key) then
-
logger.errx(task, 'missing required headers to sign email')
return false, {}
end
@@ -258,14 +257,14 @@ local function prepare_dkim_signing(N, task, settings)
-- OpenDKIM style
if is_skip_sign() then
lua_util.debugm(N, task,
- 'skip signing: is_sign_network: %s, is_authed: %s, is_local: %s',
- is_sign_networks, is_authed, is_local)
+ 'skip signing: is_sign_network: %s, is_authed: %s, is_local: %s',
+ is_sign_networks, is_authed, is_local)
return false, {}
end
if not hfrom or not hfrom[1] or not hfrom[1].addr then
lua_util.debugm(N, task,
- 'signing_table: cannot get data when no header from is presented')
+ 'signing_table: cannot get data when no header from is presented')
return false, {}
end
local sign_entry = settings.signing_table:get_key(hfrom[1].addr:lower())
@@ -273,7 +272,7 @@ local function prepare_dkim_signing(N, task, settings)
if sign_entry then
-- Check opendkim style entries
lua_util.debugm(N, task,
- 'signing_table: found entry for %s: %s', hfrom[1].addr, sign_entry)
+ 'signing_table: found entry for %s: %s', hfrom[1].addr, sign_entry)
if sign_entry == '%' then
sign_entry = hdom
end
@@ -291,7 +290,7 @@ local function prepare_dkim_signing(N, task, settings)
if not selector then
logger.errx(task, 'no selector defined for sign_entry %s, key_entry %s',
- sign_entry, key_entry)
+ sign_entry, key_entry)
return false, {}
end
@@ -305,11 +304,11 @@ local function prepare_dkim_signing(N, task, settings)
if st:sub(1, 1) == '/' or st == './' or st == '..' then
res.key = parts[2]:gsub('%%', hdom)
lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, key file=%s',
- hdom, selector, res.domain, res.key)
+ hdom, selector, res.domain, res.key)
else
res.rawkey = parts[2] -- No sanity check here
lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, raw key used',
- hdom, selector, res.domain)
+ hdom, selector, res.domain)
end
return true, { res }
@@ -327,56 +326,56 @@ local function prepare_dkim_signing(N, task, settings)
if st:sub(1, 1) == '/' or st == './' or st == '..' then
res.key = parts[3]:gsub('%%', hdom)
lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, key file=%s',
- hdom, selector, res.domain, res.key)
+ hdom, selector, res.domain, res.key)
else
res.rawkey = parts[3] -- No sanity check here
lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, raw key used',
- hdom, selector, res.domain)
+ hdom, selector, res.domain)
end
return true, { res }
else
logger.errx(task, 'invalid key entry for sign entry %s: %s; when signing %s domain',
- sign_entry, key_entry, hdom)
+ sign_entry, key_entry, hdom)
return false, {}
end
elseif settings.use_vault then
-- Sign table is presented, the rest is covered by vault
lua_util.debugm(N, task, 'check vault for %s, by sign entry %s, key entry is missing',
- hdom, sign_entry)
+ hdom, sign_entry)
return true, {
domain = sign_entry,
vault = true
}
else
logger.errx(task, 'missing key entry for sign entry %s; when signing %s domain',
- sign_entry, hdom)
+ sign_entry, hdom)
return false, {}
end
else
logger.errx(task, 'cannot get key entry for signing entry %s, when signing %s domain',
- sign_entry, hdom)
+ sign_entry, hdom)
return false, {}
end
else
lua_util.debugm(N, task,
- 'signing_table: no entry for %s', hfrom[1].addr)
+ 'signing_table: no entry for %s', hfrom[1].addr)
return false, {}
end
else
if settings.use_domain_sign_networks and is_sign_networks then
dkim_domain = get_dkim_domain('use_domain_sign_networks')
lua_util.debugm(N, task,
- 'sign_networks: use domain(%s) for signature: %s',
- settings.use_domain_sign_networks, dkim_domain)
+ 'sign_networks: use domain(%s) for signature: %s',
+ settings.use_domain_sign_networks, dkim_domain)
elseif settings.use_domain_sign_local and is_local then
dkim_domain = get_dkim_domain('use_domain_sign_local')
lua_util.debugm(N, task, 'local: use domain(%s) for signature: %s',
- settings.use_domain_sign_local, dkim_domain)
+ settings.use_domain_sign_local, dkim_domain)
elseif settings.use_domain_sign_inbound and not is_local and not auser then
dkim_domain = get_dkim_domain('use_domain_sign_inbound')
lua_util.debugm(N, task, 'inbound: use domain(%s) for signature: %s',
- settings.use_domain_sign_inbound, dkim_domain)
+ settings.use_domain_sign_inbound, dkim_domain)
elseif settings.use_domain_custom then
if type(settings.use_domain_custom) == 'string' then
-- Load custom function
@@ -387,10 +386,10 @@ local function prepare_dkim_signing(N, task, settings)
settings.use_domain_custom = res_or_err
dkim_domain = settings.use_domain_custom(task)
lua_util.debugm(N, task, 'use custom domain for signing: %s',
- dkim_domain)
+ dkim_domain)
else
logger.errx(task, 'cannot load dkim domain custom script: invalid type: %s, expected function',
- type(res_or_err))
+ type(res_or_err))
settings.use_domain_custom = nil
end
else
@@ -400,12 +399,12 @@ local function prepare_dkim_signing(N, task, settings)
else
dkim_domain = settings.use_domain_custom(task)
lua_util.debugm(N, task, 'use custom domain for signing: %s',
- dkim_domain)
+ dkim_domain)
end
else
dkim_domain = get_dkim_domain('use_domain')
lua_util.debugm(N, task, 'use domain(%s) for signature: %s',
- settings.use_domain, dkim_domain)
+ settings.use_domain, dkim_domain)
end
end
@@ -467,7 +466,7 @@ local function prepare_dkim_signing(N, task, settings)
})
else
lua_util.debugm(N, task, 'domain %s is not designated for vault',
- dkim_domain)
+ dkim_domain)
end
else
-- TODO: try every domain in the vault
@@ -501,7 +500,7 @@ local function prepare_dkim_signing(N, task, settings)
if ret then
table.insert(p, k)
lua_util.debugm(N, task, 'using mempool selector %s with key %s',
- k.selector, k.key)
+ k.selector, k.key)
end
end
@@ -530,11 +529,11 @@ local function prepare_dkim_signing(N, task, settings)
if not settings.use_redis then
insert_or_update_prop(N, task, p, 'key',
- 'default path', settings.path)
+ 'default path', settings.path)
end
insert_or_update_prop(N, task, p, 'selector',
- 'default selector', settings.selector)
+ 'default selector', settings.selector)
if settings.check_violation then
if not check_violation(N, task, p.domain) then
@@ -543,7 +542,7 @@ local function prepare_dkim_signing(N, task, settings)
end
insert_or_update_prop(N, task, p, 'domain', 'dkim_domain',
- dkim_domain)
+ dkim_domain)
return #p > 0 and true or false, p
end
@@ -560,53 +559,53 @@ exports.sign_using_redis = function(N, task, settings, selectors, sign_func, err
local function redis_key_cb(err, data)
if err then
err_func(string.format("cannot make request to load DKIM key for %s: %s",
- rk, err))
+ rk, err))
elseif type(data) ~= 'string' then
lua_util.debugm(N, task, "missing DKIM key for %s", rk)
else
p.rawkey = data
lua_util.debugm(N, task, 'found and parsed key for %s:%s in Redis',
- p.domain, p.selector)
+ p.domain, p.selector)
sign_func(task, p)
end
end
local rret = lua_redis.redis_make_request(task,
- settings.redis_params, -- connect params
- rk, -- hash key
- false, -- is write
- redis_key_cb, --callback
- 'HGET', -- command
- { settings.key_prefix, rk } -- arguments
+ settings.redis_params, -- connect params
+ rk, -- hash key
+ false, -- is write
+ redis_key_cb, --callback
+ 'HGET', -- command
+ { settings.key_prefix, rk } -- arguments
)
if not rret then
err_func(task,
- string.format("cannot make request to load DKIM key for %s", rk))
+ string.format("cannot make request to load DKIM key for %s", rk))
end
end
for _, p in ipairs(selectors) do
if settings.selector_prefix then
logger.infox(task, "using selector prefix '%s' for domain '%s'",
- settings.selector_prefix, p.domain);
+ settings.selector_prefix, p.domain);
local function redis_selector_cb(err, data)
if err or type(data) ~= 'string' then
err_func(task, string.format("cannot make request to load DKIM selector for domain %s: %s",
- p.domain, err))
+ p.domain, err))
else
try_redis_key(data, p)
end
end
local rret = lua_redis.redis_make_request(task,
- settings.redis_params, -- connect params
- p.domain, -- hash key
- false, -- is write
- redis_selector_cb, --callback
- 'HGET', -- command
- { settings.selector_prefix, p.domain } -- arguments
+ settings.redis_params, -- connect params
+ p.domain, -- hash key
+ false, -- is write
+ redis_selector_cb, --callback
+ 'HGET', -- command
+ { settings.selector_prefix, p.domain } -- arguments
)
if not rret then
err_func(task, string.format("cannot make Redis request to load DKIM selector for domain %s",
- p.domain))
+ p.domain))
end
else
try_redis_key(p.selector, p)
@@ -619,25 +618,25 @@ exports.sign_using_vault = function(N, task, settings, selector, sign_func, err_
local ucl = require "ucl"
local full_url = string.format('%s/v1/%s/%s',
- settings.vault_url, settings.vault_path or 'dkim', selector.domain)
+ settings.vault_url, settings.vault_path or 'dkim', selector.domain)
local upstream_list = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), settings.vault_url)
local function vault_callback(err, code, body, _)
if code ~= 200 then
err_func(task, string.format('cannot request data from the vault url: %s; %s (%s)',
- full_url, err, body))
+ full_url, err, body))
else
local parser = ucl.parser()
local res, parser_err = parser:parse_string(body)
if not res then
err_func(task, string.format('vault reply for %s (data=%s) cannot be parsed: %s',
- full_url, body, parser_err))
+ full_url, body, parser_err))
else
local obj = parser:get_object()
if not obj or not obj.data then
err_func(task, string.format('vault reply for %s (data=%s) is invalid, no data',
- full_url, body))
+ full_url, body))
else
local elts = obj.data.selectors or {}
local errs = {}
@@ -675,13 +674,13 @@ exports.sign_using_vault = function(N, task, settings, selector, sign_func, err_
alg = p.alg,
}
lua_util.debugm(N, task, 'found and parsed key for %s:%s in Vault',
- dkim_sign_data.domain, dkim_sign_data.selector)
+ dkim_sign_data.domain, dkim_sign_data.selector)
nvalid = nvalid + 1
sign_func(task, dkim_sign_data)
end, fun.filter(is_selector_valid, elts))
for _, e in errs do
lua_util.debugm(N, task, 'error found during processing Vault selectors: %s:%s',
- e[1], e[2])
+ e[1], e[2])
end
if nvalid == 0 then
@@ -707,7 +706,7 @@ exports.sign_using_vault = function(N, task, settings, selector, sign_func, err_
if not ret then
err_func(task, string.format("cannot make HTTP request to load DKIM data domain %s",
- selector.domain))
+ selector.domain))
end
end
@@ -732,8 +731,7 @@ exports.process_signing_settings = function(N, settings, opts)
selector_map = { 'map', 'DKIM selectors' },
signing_table = { 'glob', 'DKIM signing table' },
key_table = { 'glob', 'DKIM keys table' },
- vault_domains = { 'glob', 'DKIM signing domains in vault' },
- whitelisted_signers_map = { 'set', 'ARC trusted signers domains' }
+ vault_domains = { 'glob', 'DKIM signing domains in vault' }
}
for k, v in pairs(opts) do
local maybe_map = maps_opts[k]
diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua
index 971ddd95f..4a5abd8ce 100644
--- a/lualib/lua_magic/patterns.lua
+++ b/lualib/lua_magic/patterns.lua
@@ -466,6 +466,23 @@ local patterns = {
},
}
},
+ heic = {
+ matches = {
+ {
+ -- HEIC/HEIF file format signature
+ -- Starts with ftyp followed by specific brand identifiers
+ string = "^....ftyphe[im][cs]",
+ position = 12,
+ weight = 60,
+ },
+ {
+ -- Alternative signature for HEIC/HEIF
+ string = [[^....ftypmif1]],
+ position = 12,
+ weight = 60,
+ },
+ }
+ },
}
return patterns
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index 3dce2e1f8..ad4ae4349 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -279,6 +279,11 @@ local types = {
ct = 'image/bmp',
av_check = false,
},
+ heic = {
+ type = 'image',
+ ct = 'image/heic',
+ av_check = false,
+ },
dwg = {
type = 'image',
ct = 'image/vnd.dwg',
@@ -324,4 +329,4 @@ local types = {
},
}
-return types \ No newline at end of file
+return types
diff --git a/lualib/lua_maps.lua b/lualib/lua_maps.lua
index 2699ea214..c45b51b97 100644
--- a/lualib/lua_maps.lua
+++ b/lualib/lua_maps.lua
@@ -88,16 +88,64 @@ end
local external_map_schema = ts.shape {
external = ts.equivalent(true), -- must be true
- backend = ts.string, -- where to get data, required
- method = ts.one_of { "body", "header", "query" }, -- how to pass input
+ backend = ts.string:is_optional(), -- where to get data, required for HTTP
+ cdb = ts.string:is_optional(), -- path to CDB file, required for CDB
+ method = ts.one_of { "body", "header", "query" }:is_optional(), -- how to pass input
encode = ts.one_of { "json", "messagepack" }:is_optional(), -- how to encode input (if relevant)
timeout = (ts.number + ts.string / lua_util.parse_time_interval):is_optional(),
}
+-- Storage for CDB instances
+local cdb_maps = {}
+local cdb_finisher_set = false
+
local rspamd_http = require "rspamd_http"
local ucl = require "ucl"
+-- Function to handle CDB maps
+local function handle_cdb_map(map_config, key, callback, task)
+ local rspamd_cdb = require "rspamd_cdb"
+ local hash_key = map_config.cdb
+
+ -- Check if we need to open the CDB file
+ if not cdb_maps[hash_key] then
+ local cdb_file = map_config.cdb
+ -- Provide ev_base to monitor changes
+ local cdb_handle = rspamd_cdb.open(cdb_file, task:get_ev_base())
+
+ if not cdb_handle then
+ local err_msg = string.format("Failed to open CDB file: %s", cdb_file)
+ rspamd_logger.errx(task, err_msg)
+ if callback then
+ callback(false, err_msg, 500, task)
+ end
+ return nil
+ else
+ cdb_maps[hash_key] = cdb_handle
+ end
+ end
+
+ -- Look up the key in CDB
+ local result = cdb_maps[hash_key]:find(key)
+
+ if callback then
+ if result then
+ callback(true, result, 200, task)
+ else
+ callback(false, 'not found', 404, task)
+ end
+ return nil
+ end
+
+ return result
+end
+
local function query_external_map(map_config, upstreams, key, callback, task)
+ -- Check if this is a CDB map
+ if map_config.cdb then
+ return handle_cdb_map(map_config, key, callback, task)
+ end
+ -- Fallback to HTTP
local http_method = (map_config.method == 'body' or map_config.method == 'form') and 'POST' or 'GET'
local upstream = upstreams:get_upstream_round_robin()
local http_headers = {
@@ -138,7 +186,8 @@ local function query_external_map(map_config, upstreams, key, callback, task)
local params_table = {}
for k, v in pairs(key) do
if type(v) == 'string' then
- table.insert(params_table, string.format('%s=%s', lua_util.url_encode_string(k), lua_util.url_encode_string(v)))
+ table.insert(params_table,
+ string.format('%s=%s', lua_util.url_encode_string(k), lua_util.url_encode_string(v)))
end
end
url = string.format('%s?%s', url, table.concat(params_table, '&'))
@@ -305,7 +354,7 @@ local function rspamd_map_add_from_ucl(opt, mtype, description, callback)
if string.find(opt[1], '^%d') then
-- List of numeric stuff (hope it's ipnets definitions)
- local map = rspamd_config:radix_from_ucl(opt)
+ local map = rspamd_config:radix_from_ucl(opt, description)
if map then
ret.__data = map
@@ -448,17 +497,39 @@ local function rspamd_map_add_from_ucl(opt, mtype, description, callback)
local parse_res, parse_err = external_map_schema(opt)
if parse_res then
- ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend)
- if ret.__upstreams then
+ if opt.cdb then
ret.__data = opt
ret.__external = true
setmetatable(ret, ret_mt)
maybe_register_selector()
+ if not cdb_finisher_set then
+ -- Register a finalize script to close all CDB handles when Rspamd stops
+ rspamd_config:register_finish_script(function()
+ for path, _ in pairs(cdb_maps) do
+ rspamd_logger.infox(rspamd_config, 'closing CDB map: %s', path)
+ cdb_maps[path] = nil
+ end
+ end)
+ cdb_finisher_set = true
+ end
+
return ret
+ elseif opt.backend then
+ ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend)
+ if ret.__upstreams then
+ ret.__data = opt
+ ret.__external = true
+ setmetatable(ret, ret_mt)
+ maybe_register_selector()
+
+ return ret
+ else
+ rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s',
+ opt.backend)
+ end
else
- rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s',
- opt.backend)
+ rspamd_logger.errx(rspamd_config, 'external map requires either "cdb" or "backend" parameter')
end
else
rspamd_logger.errx(rspamd_config, 'cannot parse external map: %s',
@@ -526,15 +597,12 @@ local function rspamd_maybe_check_map(key, what)
return rspamd_maybe_check_map(key, elt)
end, what)
end
- if type(rspamd_maps) == "table" then
- local mn
- if starts(key, "map:") then
- mn = string.sub(key, 5)
- elseif starts(key, "map://") then
- mn = string.sub(key, 7)
+ if type(rspamd_maps) == "table" and starts(key, "map:") then
+ local mn = string.sub(key, 5)
+ if starts(mn, "//") then
+ mn = string.sub(mn, 3)
end
-
- if mn and rspamd_maps[mn] then
+ if rspamd_maps[mn] then
return rspamd_maps[mn]:get_key(what)
end
end
diff --git a/lualib/lua_maps_expressions.lua b/lualib/lua_maps_expressions.lua
index 996de99c0..2ad9ad1d8 100644
--- a/lualib/lua_maps_expressions.lua
+++ b/lualib/lua_maps_expressions.lua
@@ -155,7 +155,7 @@ local function create(cfg, obj, module_name)
end
end
local map = lua_maps.map_add_from_ucl(rule.map, rule.type,
- obj.description or module_name)
+ rule.description or obj.description or module_name)
if not map then
rspamd_logger.errx(cfg, 'cannot add map for element %s in module %s',
name, module_name)
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua
index fe221f599..c85f35066 100644
--- a/lualib/lua_mime.lua
+++ b/lualib/lua_mime.lua
@@ -158,13 +158,21 @@ exports.add_text_footer = function(task, html_footer, text_footer)
local cur_boundary
for _, part in ipairs(task:get_parts()) do
local boundary = part:get_boundary()
+ local part_ct = part:get_header('Content-Type')
+ if part_ct then
+ part_ct = rspamd_util.parse_content_type(part_ct, task:get_mempool())
+ end
if part:is_multipart() then
if cur_boundary then
out[#out + 1] = string.format('--%s',
- boundaries[#boundaries])
+ boundaries[#boundaries].boundary)
end
- boundaries[#boundaries + 1] = boundary or '--XXX'
+ boundaries[#boundaries + 1] = {
+ boundary = boundary or '--XXX',
+ ct_type = part_ct.type or '',
+ ct_subtype = part_ct.subtype or '',
+ }
cur_boundary = boundary
local rh = part:get_raw_headers()
@@ -176,7 +184,7 @@ exports.add_text_footer = function(task, html_footer, text_footer)
if cur_boundary and boundary ~= cur_boundary then
-- Need to close boundary
out[#out + 1] = string.format('--%s--%s',
- boundaries[#boundaries], newline_s)
+ boundaries[#boundaries].boundary, newline_s)
table.remove(boundaries)
cur_boundary = nil
end
@@ -218,7 +226,13 @@ exports.add_text_footer = function(task, html_footer, text_footer)
if cur_boundary and boundary ~= cur_boundary then
-- Need to close boundary
out[#out + 1] = string.format('--%s--%s',
- boundaries[#boundaries], newline_s)
+ boundaries[#boundaries].boundary, newline_s)
+ -- Need to close previous boundary, if ct_subtype is related
+ if #boundaries > 1 and boundaries[#boundaries].ct_type == "multipart" and boundaries[#boundaries].ct_subtype == "related" then
+ out[#out + 1] = string.format('--%s--%s',
+ boundaries[#boundaries -1].boundary, newline_s)
+ table.remove(boundaries)
+ end
table.remove(boundaries)
cur_boundary = boundary
end
@@ -239,7 +253,7 @@ exports.add_text_footer = function(task, html_footer, text_footer)
-- Close remaining
local b = table.remove(boundaries)
while b do
- out[#out + 1] = string.format('--%s--', b)
+ out[#out + 1] = string.format('--%s--', b.boundary)
if #boundaries > 0 then
out[#out + 1] = ''
end
diff --git a/lualib/lua_mime_types.lua b/lualib/lua_mime_types.lua
index ba55f9740..7b6688b3c 100644
--- a/lualib/lua_mime_types.lua
+++ b/lualib/lua_mime_types.lua
@@ -214,7 +214,7 @@ exports.full_extensions_map = {
{ "hxw", "application/octet-stream" },
{ "hxx", "text/plain" },
{ "i", "text/plain" },
- { "ico", "image/x-icon" },
+ { "ico", {"image/x-icon", "image/vnd.microsoft.icon"} },
{ "ics", { "text/calendar", "application/ics", "application/octet-stream" } },
{ "idl", "text/plain" },
{ "ief", "image/ief" },
diff --git a/lualib/lua_redis.lua b/lualib/lua_redis.lua
index 85f5ebc7a..195b7759f 100644
--- a/lualib/lua_redis.lua
+++ b/lualib/lua_redis.lua
@@ -26,7 +26,7 @@ local N = "lua_redis"
local db_schema = (ts.number / tostring + ts.string):is_optional():describe("Database number")
local common_schema = {
- timeout = (ts.number + ts.string / lutil.parse_time_interval):is_optional():describe("Connection timeout"),
+ timeout = (ts.number + ts.string / lutil.parse_time_interval):is_optional():describe("Connection timeout (seconds)"),
db = db_schema,
database = db_schema,
dbname = db_schema,
@@ -40,6 +40,7 @@ local common_schema = {
sentinel_master_maxerrors = (ts.number + ts.string / tonumber):is_optional():describe("Sentinel master max errors"),
sentinel_username = ts.string:is_optional():describe("Sentinel username"),
sentinel_password = ts.string:is_optional():describe("Sentinel password"),
+ redis_version = (ts.number + ts.string / tonumber):is_optional():describe("Redis server version (6 or 7)"),
}
local read_schema = lutil.table_merge({
@@ -357,6 +358,10 @@ local function process_redis_opts(options, redis_params)
redis_params['prefix'] = options['prefix']
end
+ if options['redis_version'] and not redis_params['redis_version'] then
+ redis_params['redis_version'] = tonumber(options['redis_version'])
+ end
+
if type(options['expand_keys']) == 'boolean' then
redis_params['expand_keys'] = options['expand_keys']
else
@@ -1124,9 +1129,9 @@ local function redis_make_request_taskless(ev_base, cfg, redis_params, key,
end
--[[[
--- @function lua_redis.redis_make_request_taskless(ev_base, redis_params, key, is_write, callback, command, args)
+-- @function lua_redis.redis_make_request_taskless(ev_base, cfg, redis_params, key, is_write, callback, command, args)
-- Sends a request to Redis in context where `task` is not available for some specific use-cases
--- Identical to redis_make_request() except in that first parameter is an `event base` object
+-- Identical to redis_make_request() except in that first parameter is an `event base` object and the second one is the 'config' object
--]]
exports.rspamd_redis_make_request_taskless = redis_make_request_taskless
@@ -1202,15 +1207,13 @@ local function prepare_redis_call(script)
return options
end
-local function is_all_servers_ready(script)
+local function is_any_server_ready(script)
for _, s in ipairs(script.servers_ready) do
- if s == "unsent" or s == "tempfail" then
- return false
+ if s == "done" then
+ return true
end
end
-
- -- We assume that permanent errors are not recoverable, so we will just skip those servers
- return true
+ return false
end
local function is_all_servers_failed(script)
@@ -1264,7 +1267,7 @@ local function load_script_task(script, task, is_write)
script.sha = data -- We assume that sha is the same on all servers
script.servers_ready[idx] = "done"
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1282,7 +1285,7 @@ local function load_script_task(script, task, is_write)
end
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1309,7 +1312,6 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
err, script.caller.short_src, script.caller.currentline)
opt.upstream:fail()
script.servers_ready[idx] = "failed"
- return
else
-- Assume temporary error
logger.infox(cfg, 'temporary error uploading script %s to %s: %s; registered from: %s:%s',
@@ -1317,7 +1319,6 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
opt.upstream:get_addr():to_string(true),
err, script.caller.short_src, script.caller.currentline)
script.servers_ready[idx] = "tempfail"
- return
end
else
opt.upstream:ok()
@@ -1330,7 +1331,7 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
script.servers_ready[idx] = "done"
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1348,7 +1349,7 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
end
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1477,6 +1478,10 @@ local function exec_redis_script(id, params, callback, keys, args)
script.sha = nil
script.loaded = nil
script.pending_upload = true
+ -- We must initialize all servers as we don't know here which one failed
+ for i, _ in ipairs(script.servers_ready) do
+ script.servers_ready[i] = "unsent"
+ end
-- Reload scripts if this has not been initiated yet
if params.task then
load_script_task(script, params.task)
@@ -1510,15 +1515,20 @@ local function exec_redis_script(id, params, callback, keys, args)
end
end
+ local redis_command = 'EVALSHA'
+ if not params.is_write and script.redis_params.redis_version and
+ script.redis_params.redis_version >= 7 then
+ redis_command = 'EVALSHA_RO'
+ end
if params.task then
if not rspamd_redis_make_request(params.task, script.redis_params,
- params.key, params.is_write, redis_cb, 'EVALSHA', redis_args) then
+ params.key, params.is_write, redis_cb, redis_command, redis_args) then
callback('Cannot make redis request', nil)
end
else
if not redis_make_request_taskless(params.ev_base, rspamd_config,
script.redis_params,
- params.key, params.is_write, redis_cb, 'EVALSHA', redis_args) then
+ params.key, params.is_write, redis_cb, redis_command, redis_args) then
callback('Cannot make redis request', nil)
end
end
diff --git a/lualib/lua_scanners/cloudmark.lua b/lualib/lua_scanners/cloudmark.lua
index 26a3bf9c4..12a60abf1 100644
--- a/lualib/lua_scanners/cloudmark.lua
+++ b/lualib/lua_scanners/cloudmark.lua
@@ -173,53 +173,6 @@ local function cloudmark_config(opts)
return nil
end
--- Converts a key-value map to the table representing multipart body, with the following values:
--- `data`: data of the part
--- `filename`: optional filename
--- `content-type`: content type of the element (optional)
--- `content-transfer-encoding`: optional CTE header
-local function table_to_multipart_body(tbl, boundary)
- local seen_data = false
- local out = {}
-
- for k, v in pairs(tbl) do
- if v.data then
- seen_data = true
- table.insert(out, string.format('--%s\r\n', boundary))
- if v.filename then
- table.insert(out,
- string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
- k, v.filename))
- else
- table.insert(out,
- string.format('Content-Disposition: form-data; name="%s"\r\n', k))
- end
- if v['content-type'] then
- table.insert(out,
- string.format('Content-Type: %s\r\n', v['content-type']))
- else
- table.insert(out, 'Content-Type: text/plain\r\n')
- end
- if v['content-transfer-encoding'] then
- table.insert(out,
- string.format('Content-Transfer-Encoding: %s\r\n',
- v['content-transfer-encoding']))
- else
- table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
- end
- table.insert(out, '\r\n')
- table.insert(out, v.data)
- table.insert(out, '\r\n')
- end
- end
-
- if seen_data then
- table.insert(out, string.format('--%s--\r\n', boundary))
- end
-
- return out
-end
-
local function get_specific_symbol(scores_symbols, score)
local selected
local sel_thr = -1
@@ -359,7 +312,7 @@ local function cloudmark_check(task, content, digest, rule, maybe_part)
local request_data = {
task = task,
url = url,
- body = table_to_multipart_body(request, static_boundary),
+ body = lua_util.table_to_multipart_body(request, static_boundary),
headers = {
['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
},
diff --git a/lualib/lua_scanners/icap.lua b/lualib/lua_scanners/icap.lua
index 2e3ced034..532858793 100644
--- a/lualib/lua_scanners/icap.lua
+++ b/lualib/lua_scanners/icap.lua
@@ -239,13 +239,16 @@ local function icap_check(task, content, digest, rule, maybe_part)
end
end
- local function get_req_headers()
-
+ local function get_req_headers()
local in_client_ip = task:get_from_ip()
+ local in_client_ip_str = in_client_ip:to_string()
local req_hlen = 2
+ if in_client_ip:get_version() == 6 then
+ in_client_ip_str = "ip6-" .. string.gsub(in_client_ip_str, ":", "-")
+ end
if maybe_part then
table.insert(req_headers,
- string.format('GET http://%s/%s HTTP/1.0\r\n', in_client_ip, lua_util.url_encode_string(maybe_part:get_filename())))
+ string.format('GET http://%s/%s HTTP/1.0\r\n', in_client_ip_str, lua_util.url_encode_string(maybe_part:get_filename())))
if rule.use_specific_content_type then
table.insert(http_headers, string.format('Content-Type: %s/%s\r\n', maybe_part:get_detected_type()))
--else
diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua
index 62b38c87e..636212b1f 100644
--- a/lualib/lua_util.lua
+++ b/lualib/lua_util.lua
@@ -1805,4 +1805,55 @@ exports.symbols_priorities = {
low = 0,
}
+---[[[
+-- @function lua_util.table_to_multipart_body(tbl, boundary)
+-- Converts a key-value map to the table representing multipart body, with the following values:
+-- `data`: data of the part
+-- `filename`: optional filename
+-- `content-type`: content type of the element (optional)
+-- `content-transfer-encoding`: optional CTE header
+local function table_to_multipart_body(tbl, boundary)
+ local seen_data = false
+ local out = {}
+
+ for k, v in pairs(tbl) do
+ if v.data then
+ seen_data = true
+ table.insert(out, string.format('--%s\r\n', boundary))
+ if v.filename then
+ table.insert(out,
+ string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
+ k, v.filename))
+ else
+ table.insert(out,
+ string.format('Content-Disposition: form-data; name="%s"\r\n', k))
+ end
+ if v['content-type'] then
+ table.insert(out,
+ string.format('Content-Type: %s\r\n', v['content-type']))
+ else
+ table.insert(out, 'Content-Type: text/plain\r\n')
+ end
+ if v['content-transfer-encoding'] then
+ table.insert(out,
+ string.format('Content-Transfer-Encoding: %s\r\n',
+ v['content-transfer-encoding']))
+ else
+ table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
+ end
+ table.insert(out, '\r\n')
+ table.insert(out, v.data)
+ table.insert(out, '\r\n')
+ end
+ end
+
+ if seen_data then
+ table.insert(out, string.format('--%s--\r\n', boundary))
+ end
+
+ return out
+end
+
+exports.table_to_multipart_body = table_to_multipart_body
+
return exports
diff --git a/lualib/plugins/neural.lua b/lualib/plugins/neural.lua
index 6e88ef21c..545214669 100644
--- a/lualib/plugins/neural.lua
+++ b/lualib/plugins/neural.lua
@@ -757,7 +757,7 @@ local function process_rules_settings()
type = 'set',
})
lua_redis.register_prefix(selt.prefix .. '_\\d+_ham_set', N,
- string.format('NN learning set (spam) for rule "%s"; settings id "%s"',
+ string.format('NN learning set (ham) for rule "%s"; settings id "%s"',
rule.prefix, selt.name), {
persistent = true,
type = 'set',
diff --git a/lualib/plugins/rbl.lua b/lualib/plugins/rbl.lua
index af5d6bd91..074fc7f0c 100644
--- a/lualib/plugins/rbl.lua
+++ b/lualib/plugins/rbl.lua
@@ -32,6 +32,7 @@ local check_types = {
content_urls = {},
numeric_urls = {},
emails = {},
+ images = {},
replyto = {},
dkim = {},
rdns = {
@@ -165,8 +166,6 @@ local function convert_checks(rule, name)
end
end
- rule[check] = check_type
-
if not check_type.connfilter then
all_connfilter = false
end
@@ -176,6 +175,8 @@ local function convert_checks(rule, name)
name, check)
return nil
end
+
+ rule[check] = true
else
rspamd_logger.infox(rspamd_config, 'disable check %s in %s: excluded explicitly',
check, name)
diff --git a/lualib/redis_scripts/bayes_cache_learn.lua b/lualib/redis_scripts/bayes_cache_learn.lua
index d8a2d878e..a7c9ac443 100644
--- a/lualib/redis_scripts/bayes_cache_learn.lua
+++ b/lualib/redis_scripts/bayes_cache_learn.lua
@@ -1,12 +1,15 @@
--- Lua script to perform cache checking for bayes classification
+-- Lua script to perform cache checking for bayes classification (multi-class)
-- This script accepts the following parameters:
-- key1 - cache id
--- key3 - is spam (1 or 0)
+-- key2 - class_id (numeric hash of class name, computed by C side)
-- key3 - configuration table in message pack
local cache_id = KEYS[1]
-local is_spam = KEYS[2]
+local class_id = KEYS[2]
local conf = cmsgpack.unpack(KEYS[3])
+
+-- Use class_id directly as cache value
+local cache_value = tostring(class_id)
cache_id = string.sub(cache_id, 1, conf.cache_elt_len)
-- Try each prefix that is in Redis (as some other instance might have set it)
@@ -15,8 +18,8 @@ for i = 0, conf.cache_max_keys do
local have = redis.call('HGET', prefix, cache_id)
if have then
- -- Already in cache, but is_spam changes when relearning
- redis.call('HSET', prefix, cache_id, is_spam)
+ -- Already in cache, but cache_value changes when relearning
+ redis.call('HSET', prefix, cache_id, cache_value)
return false
end
end
@@ -30,7 +33,7 @@ for i = 0, conf.cache_max_keys do
if count < lim then
-- We can add it to this prefix
- redis.call('HSET', prefix, cache_id, is_spam)
+ redis.call('HSET', prefix, cache_id, cache_value)
added = true
end
end
@@ -46,7 +49,7 @@ if not added then
if exists then
if not expired then
redis.call('DEL', prefix)
- redis.call('HSET', prefix, cache_id, is_spam)
+ redis.call('HSET', prefix, cache_id, cache_value)
-- Do not expire anything else
expired = true
diff --git a/lualib/redis_scripts/bayes_classify.lua b/lualib/redis_scripts/bayes_classify.lua
index e94f645fd..d6132e631 100644
--- a/lualib/redis_scripts/bayes_classify.lua
+++ b/lualib/redis_scripts/bayes_classify.lua
@@ -1,37 +1,68 @@
--- Lua script to perform bayes classification
+-- Lua script to perform bayes classification (multi-class)
-- This script accepts the following parameters:
-- key1 - prefix for bayes tokens (e.g. for per-user classification)
--- key2 - set of tokens encoded in messagepack array of strings
+-- key2 - class labels: table of all class labels as "TABLE:label1,label2,..."
+-- key3 - set of tokens encoded in messagepack array of strings
local prefix = KEYS[1]
-local output_spam = {}
-local output_ham = {}
+local class_labels_arg = KEYS[2]
+local input_tokens = cmsgpack.unpack(KEYS[3])
-local learned_ham = tonumber(redis.call('HGET', prefix, 'learns_ham')) or 0
-local learned_spam = tonumber(redis.call('HGET', prefix, 'learns_spam')) or 0
+-- Parse class labels (always expect TABLE: format)
+local class_labels = {}
+if string.match(class_labels_arg, "^TABLE:") then
+ local labels_str = string.sub(class_labels_arg, 7) -- Remove "TABLE:" prefix
+ for label in string.gmatch(labels_str, "([^,]+)") do
+ table.insert(class_labels, label)
+ end
+else
+ -- Legacy single class - convert to array
+ class_labels = { class_labels_arg }
+end
--- Output is a set of pairs (token_index, token_count), tokens that are not
--- found are not filled.
--- This optimisation will save a lot of space for sparse tokens, and in Bayes that assumption is normally held
+-- Get learned counts for all classes (ordered)
+local learned_counts = {}
+for _, label in ipairs(class_labels) do
+ local key = 'learns_' .. string.lower(label)
+ -- Handle legacy keys for backward compatibility
+ if label == 'H' then
+ key = 'learns_ham'
+ elseif label == 'S' then
+ key = 'learns_spam'
+ end
+ table.insert(learned_counts, tonumber(redis.call('HGET', prefix, key)) or 0)
+end
-if learned_ham > 0 and learned_spam > 0 then
- local input_tokens = cmsgpack.unpack(KEYS[2])
- for i, token in ipairs(input_tokens) do
- local token_data = redis.call('HMGET', token, 'H', 'S')
+-- Get token data for all classes (ordered)
+local token_results = {}
+for i, _ in ipairs(class_labels) do
+ token_results[i] = {}
+end
- if token_data then
- local ham_count = token_data[1]
- local spam_count = token_data[2]
+-- Check if we have any learning data
+local has_learns = false
+for _, count in ipairs(learned_counts) do
+ if count > 0 then
+ has_learns = true
+ break
+ end
+end
- if ham_count then
- table.insert(output_ham, { i, tonumber(ham_count) })
- end
+if has_learns then
+ -- Process each token
+ for i, token in ipairs(input_tokens) do
+ local token_data = redis.call('HMGET', token, unpack(class_labels))
- if spam_count then
- table.insert(output_spam, { i, tonumber(spam_count) })
+ if token_data then
+ for j, _ in ipairs(class_labels) do
+ local count = token_data[j]
+ if count and tonumber(count) > 0 then
+ table.insert(token_results[j], { i, tonumber(count) })
+ end
end
end
end
end
-return { learned_ham, learned_spam, output_ham, output_spam } \ No newline at end of file
+-- Always return ordered arrays: [learned_counts_array, token_results_array]
+return { learned_counts, token_results }
diff --git a/lualib/redis_scripts/bayes_learn.lua b/lualib/redis_scripts/bayes_learn.lua
index 5456165b6..ebc798fe0 100644
--- a/lualib/redis_scripts/bayes_learn.lua
+++ b/lualib/redis_scripts/bayes_learn.lua
@@ -1,14 +1,14 @@
--- Lua script to perform bayes learning
+-- Lua script to perform bayes learning (multi-class)
-- This script accepts the following parameters:
-- key1 - prefix for bayes tokens (e.g. for per-user classification)
--- key2 - boolean is_spam
+-- key2 - class label string (e.g. "S", "H", "T")
-- key3 - string symbol
-- key4 - boolean is_unlearn
-- key5 - set of tokens encoded in messagepack array of strings
-- key6 - set of text tokens (if any) encoded in messagepack array of strings (size must be twice of `KEYS[5]`)
local prefix = KEYS[1]
-local is_spam = KEYS[2] == 'true' and true or false
+local class_label = KEYS[2]
local symbol = KEYS[3]
local is_unlearn = KEYS[4] == 'true' and true or false
local input_tokens = cmsgpack.unpack(KEYS[5])
@@ -18,15 +18,47 @@ if KEYS[6] then
text_tokens = cmsgpack.unpack(KEYS[6])
end
-local hash_key = is_spam and 'S' or 'H'
-local learned_key = is_spam and 'learns_spam' or 'learns_ham'
+-- Handle backward compatibility for boolean values
+if class_label == 'true' then
+ class_label = 'S' -- spam
+elseif class_label == 'false' then
+ class_label = 'H' -- ham
+end
+
+local hash_key = class_label
+local learned_key = 'learns_' .. string.lower(class_label)
+
+-- Handle legacy keys for backward compatibility
+if class_label == 'S' then
+ learned_key = 'learns_spam'
+elseif class_label == 'H' then
+ learned_key = 'learns_ham'
+end
redis.call('SADD', symbol .. '_keys', prefix)
redis.call('HSET', prefix, 'version', '2') -- new schema
-redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count
+
+-- Update learned count, but prevent it from going negative
+if is_unlearn then
+ local current_count = tonumber(redis.call('HGET', prefix, learned_key)) or 0
+ if current_count > 0 then
+ redis.call('HINCRBY', prefix, learned_key, -1)
+ end
+else
+ redis.call('HINCRBY', prefix, learned_key, 1)
+end
for i, token in ipairs(input_tokens) do
- redis.call('HINCRBY', token, hash_key, is_unlearn and -1 or 1)
+ -- Update token count, but prevent it from going negative
+ if is_unlearn then
+ local current_token_count = tonumber(redis.call('HGET', token, hash_key)) or 0
+ if current_token_count > 0 then
+ redis.call('HINCRBY', token, hash_key, -1)
+ end
+ else
+ redis.call('HINCRBY', token, hash_key, 1)
+ end
+
if text_tokens then
local tok1 = text_tokens[i * 2 - 1]
local tok2 = text_tokens[i * 2]
@@ -38,7 +70,14 @@ for i, token in ipairs(input_tokens) do
redis.call('HSET', token, 'tokens', tok1)
end
- redis.call('ZINCRBY', prefix .. '_z', is_unlearn and -1 or 1, token)
+ if is_unlearn then
+ local current_z_score = tonumber(redis.call('ZSCORE', prefix .. '_z', token)) or 0
+ if current_z_score > 0 then
+ redis.call('ZINCRBY', prefix .. '_z', -1, token)
+ end
+ else
+ redis.call('ZINCRBY', prefix .. '_z', 1, token)
+ end
end
end
end
diff --git a/lualib/redis_scripts/neural_save_unlock.lua b/lualib/redis_scripts/neural_save_unlock.lua
index 5af1ddcde..7ea7dc2e5 100644
--- a/lualib/redis_scripts/neural_save_unlock.lua
+++ b/lualib/redis_scripts/neural_save_unlock.lua
@@ -12,13 +12,14 @@
local now = tonumber(KEYS[6])
redis.call('ZADD', KEYS[2], now, KEYS[4])
redis.call('HSET', KEYS[1], 'ann', KEYS[3])
-redis.call('DEL', KEYS[1] .. '_spam_set')
-redis.call('DEL', KEYS[1] .. '_ham_set')
-redis.call('HDEL', KEYS[1], 'lock')
-redis.call('HDEL', KEYS[7], 'lock')
-redis.call('EXPIRE', KEYS[1], tonumber(KEYS[5]))
redis.call('HSET', KEYS[1], 'roc_thresholds', KEYS[8])
if KEYS[9] then
redis.call('HSET', KEYS[1], 'pca', KEYS[9])
end
-return 1 \ No newline at end of file
+redis.call('HDEL', KEYS[1], 'lock')
+redis.call('HDEL', KEYS[7], 'lock')
+redis.call('EXPIRE', KEYS[1], tonumber(KEYS[5]))
+ -- expire in 10m, to not face race condition with other rspamd replicas refill deleted keys
+redis.call('EXPIRE', KEYS[7] .. '_spam_set', 600)
+redis.call('EXPIRE', KEYS[7] .. '_ham_set', 600)
+return 1
diff --git a/lualib/rspamadm/dmarc_report.lua b/lualib/rspamadm/dmarc_report.lua
index 71ff5d163..fb28a9264 100644
--- a/lualib/rspamadm/dmarc_report.lua
+++ b/lualib/rspamadm/dmarc_report.lua
@@ -99,6 +99,8 @@ local redis_attrs = {
log_obj = rspamd_config,
resolver = rspamadm_dns_resolver,
}
+local redis_attrs_write = lua_util.shallowcopy(redis_attrs)
+redis_attrs_write['is_write'] = true
local pool
local function load_config(opts)
@@ -481,7 +483,7 @@ local function prepare_report(opts, start_time, end_time, rep_key)
-- Rename report key to avoid races
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'RENAME', rep_key, rep_key .. '_processing' })
rep_key = rep_key .. '_processing'
end
@@ -491,7 +493,7 @@ local function prepare_report(opts, start_time, end_time, rep_key)
if not dmarc_record then
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'DEL', rep_key })
end
logger.messagex('Cannot process reports for domain %s; invalid dmarc record', reporting_domain)
@@ -554,7 +556,7 @@ local function prepare_report(opts, start_time, end_time, rep_key)
lua_util.debugm(N, 'got final message: %s', message)
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'DEL', rep_key })
end
@@ -585,7 +587,7 @@ local function process_report_date(opts, start_time, end_time, date)
-- Rename index key to avoid races
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'RENAME', idx_key, idx_key .. '_processing' })
idx_key = idx_key .. '_processing'
end
@@ -595,7 +597,7 @@ local function process_report_date(opts, start_time, end_time, date)
if not ret or not results then
-- Remove bad key
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'DEL', idx_key })
end
logger.messagex('Cannot get reports for %s', date)
@@ -615,7 +617,7 @@ local function process_report_date(opts, start_time, end_time, date)
lua_util.shuffle(reports)
-- Remove processed key
if not opts.no_opt then
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'DEL', idx_key })
end
@@ -715,11 +717,11 @@ local function handler(args)
if not opts.no_opt then
lua_util.debugm(N, 'set last report date to %s', start_collection)
-- Hack to avoid coroutines + async functions mess: we use async redis call here
- redis_attrs.callback = function()
+ redis_attrs_write.callback = function()
logger.messagex('Reporting collection has finished %s dates processed, %s reports: %s completed, %s failed',
ndates, nreports, nsuccess, nfail)
end
- lua_redis.request(redis_params, redis_attrs,
+ lua_redis.request(redis_params, redis_attrs_write,
{ 'SETEX', 'rspamd_dmarc_last_collection', dmarc_settings.reporting.keys_expire * 2,
tostring(start_collection) })
else
diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua
index e0b23e16c..a20e47e23 100644
--- a/lualib/rspamadm/mime.lua
+++ b/lualib/rspamadm/mime.lua
@@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-]]--
+]] --
local argparse = require "argparse"
local ansicolors = require "ansicolors"
@@ -35,94 +35,94 @@ local parser = argparse()
:require_command(true)
parser:option "-c --config"
- :description "Path to config file"
- :argname("<cfg>")
- :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
+ :description "Path to config file"
+ :argname("<cfg>")
+ :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
parser:mutex(
- parser:flag "-j --json"
- :description "JSON output",
- parser:flag "-U --ucl"
- :description "UCL output",
- parser:flag "-M --messagepack"
- :description "MessagePack output"
+ parser:flag "-j --json"
+ :description "JSON output",
+ parser:flag "-U --ucl"
+ :description "UCL output",
+ parser:flag "-M --messagepack"
+ :description "MessagePack output"
)
parser:flag "-C --compact"
- :description "Use compact format"
+ :description "Use compact format"
parser:flag "--no-file"
- :description "Do not print filename"
+ :description "Do not print filename"
-- Extract subcommand
local extract = parser:command "extract ex e"
- :description "Extracts data from MIME messages"
+ :description "Extracts data from MIME messages"
extract:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
extract:flag "-t --text"
- :description "Extracts plain text data from a message"
+ :description "Extracts plain text data from a message"
extract:flag "-H --html"
- :description "Extracts htm data from a message"
+ :description "Extracts htm data from a message"
extract:option "-o --output"
- :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
- :argname("<type>")
- :convert {
- raw = "raw",
- content = "content",
- oneline = "content_oneline",
- decoded = "raw_parsed",
- decoded_utf = "raw_utf"
-}
- :default "content"
+ :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
+ :argname("<type>")
+ :convert {
+ raw = "raw",
+ content = "content",
+ oneline = "content_oneline",
+ decoded = "raw_parsed",
+ decoded_utf = "raw_utf"
+ }
+ :default "content"
extract:flag "-w --words"
- :description "Extracts words"
+ :description "Extracts words"
extract:flag "-p --part"
- :description "Show part info"
+ :description "Show part info"
extract:flag "-s --structure"
- :description "Show structure info (e.g. HTML tags)"
+ :description "Show structure info (e.g. HTML tags)"
extract:flag "-i --invisible"
- :description "Show invisible content for HTML parts"
+ :description "Show invisible content for HTML parts"
extract:option "-F --words-format"
- :description "Words format ('stem', 'norm', 'raw', 'full')"
- :argname("<type>")
- :convert {
- stem = "stem",
- norm = "norm",
- raw = "raw",
- full = "full",
-}
- :default "stem"
+ :description "Words format ('stem', 'norm', 'raw', 'full')"
+ :argname("<type>")
+ :convert {
+ stem = "stem",
+ norm = "norm",
+ raw = "raw",
+ full = "full",
+ }
+ :default "stem"
local stat = parser:command "stat st s"
- :description "Extracts statistical data from MIME messages"
+ :description "Extracts statistical data from MIME messages"
stat:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
stat:mutex(
- stat:flag "-m --meta"
- :description "Lua metatokens",
- stat:flag "-b --bayes"
- :description "Bayes tokens",
- stat:flag "-F --fuzzy"
- :description "Fuzzy hashes"
+ stat:flag "-m --meta"
+ :description "Lua metatokens",
+ stat:flag "-b --bayes"
+ :description "Bayes tokens",
+ stat:flag "-F --fuzzy"
+ :description "Fuzzy hashes"
)
stat:flag "-s --shingles"
:description "Show shingles for fuzzy hashes"
local urls = parser:command "urls url u"
- :description "Extracts URLs from MIME messages"
+ :description "Extracts URLs from MIME messages"
urls:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
urls:mutex(
- urls:flag "-t --tld"
- :description "Get TLDs only",
- urls:flag "-H --host"
- :description "Get hosts only",
- urls:flag "-f --full"
- :description "Show piecewise urls as processed by Rspamd"
+ urls:flag "-t --tld"
+ :description "Get TLDs only",
+ urls:flag "-H --host"
+ :description "Get hosts only",
+ urls:flag "-f --full"
+ :description "Show piecewise urls as processed by Rspamd"
)
urls:flag "-u --unique"
@@ -135,75 +135,75 @@ urls:flag "-r --reverse"
:description "Reverse sort order"
local modify = parser:command "modify mod m"
- :description "Modifies MIME message"
+ :description "Modifies MIME message"
modify:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
modify:option "-a --add-header"
- :description "Adds specific header"
- :argname "<header=value>"
- :count "*"
+ :description "Adds specific header"
+ :argname "<header=value>"
+ :count "*"
modify:option "-r --remove-header"
- :description "Removes specific header (all occurrences)"
- :argname "<header>"
- :count "*"
+ :description "Removes specific header (all occurrences)"
+ :argname "<header>"
+ :count "*"
modify:option "-R --rewrite-header"
- :description "Rewrites specific header, uses Lua string.format pattern"
- :argname "<header=pattern>"
- :count "*"
+ :description "Rewrites specific header, uses Lua string.format pattern"
+ :argname "<header=pattern>"
+ :count "*"
modify:option "-t --text-footer"
- :description "Adds footer to text/plain parts from a specific file"
- :argname "<file>"
+ :description "Adds footer to text/plain parts from a specific file"
+ :argname "<file>"
modify:option "-H --html-footer"
- :description "Adds footer to text/html parts from a specific file"
- :argname "<file>"
+ :description "Adds footer to text/html parts from a specific file"
+ :argname "<file>"
local strip = parser:command "strip"
- :description "Strip attachments from a message"
+ :description "Strip attachments from a message"
strip:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
strip:flag "-i --keep-images"
- :description "Keep images"
+ :description "Keep images"
strip:option "--min-text-size"
- :description "Minimal text size to keep"
- :argname "<size>"
- :convert(tonumber)
- :default(0)
+ :description "Minimal text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(0)
strip:option "--max-text-size"
- :description "Max text size to keep"
- :argname "<size>"
- :convert(tonumber)
- :default(math.huge)
+ :description "Max text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(math.huge)
local anonymize = parser:command "anonymize"
- :description "Try to remove sensitive information from a message"
+ :description "Try to remove sensitive information from a message"
anonymize:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
anonymize:option "--exclude-header -X"
- :description "Exclude specific headers from anonymization"
- :argname "<header>"
- :count "*"
+ :description "Exclude specific headers from anonymization"
+ :argname "<header>"
+ :count "*"
anonymize:option "--include-header -I"
- :description "Include specific headers from anonymization"
- :argname "<header>"
- :count "*"
+ :description "Include specific headers from anonymization"
+ :argname "<header>"
+ :count "*"
anonymize:flag "--gpt"
- :description "Use LLM model for anonymization (requires GPT plugin to be configured)"
+ :description "Use LLM model for anonymization (requires GPT plugin to be configured)"
anonymize:option "--model"
- :description "Model to use for anonymization"
- :argname "<model>"
+ :description "Model to use for anonymization"
+ :argname "<model>"
anonymize:option "--prompt"
- :description "Prompt to use for anonymization"
- :argname "<prompt>"
+ :description "Prompt to use for anonymization"
+ :argname "<prompt>"
local sign = parser:command "sign"
- :description "Performs DKIM signing"
+ :description "Performs DKIM signing"
sign:argument "file"
:description "File to process"
:argname "<file>"
@@ -225,33 +225,33 @@ sign:option "-t --type"
:description "ARC or DKIM signing"
:argname("<arc|dkim>")
:convert {
- ['arc'] = 'arc',
- ['dkim'] = 'dkim',
-}
+ ['arc'] = 'arc',
+ ['dkim'] = 'dkim',
+ }
:default 'dkim'
sign:option "-o --output"
:description "Output format"
:argname("<message|signature>")
:convert {
- ['message'] = 'message',
- ['signature'] = 'signature',
-}
+ ['message'] = 'message',
+ ['signature'] = 'signature',
+ }
:default 'message'
local dump = parser:command "dump"
- :description "Dumps a raw message in different formats"
+ :description "Dumps a raw message in different formats"
dump:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
-- Duplicate format for convenience
dump:mutex(
- parser:flag "-j --json"
- :description "JSON output",
- parser:flag "-U --ucl"
- :description "UCL output",
- parser:flag "-M --messagepack"
- :description "MessagePack output"
+ parser:flag "-j --json"
+ :description "JSON output",
+ parser:flag "-U --ucl"
+ :description "UCL output",
+ parser:flag "-M --messagepack"
+ :description "MessagePack output"
)
dump:flag "-s --split"
:description "Split the output file contents such that no content is embedded"
@@ -260,7 +260,7 @@ dump:option "-o --outdir"
:description "Output directory"
:argname("<directory>")
-local function load_config(opts)
+local function load_config(opts, load_tokenizers)
local _r, err = rspamd_config:load_ucl(opts['config'])
if not _r then
@@ -273,6 +273,23 @@ local function load_config(opts)
rspamd_logger.errx('cannot process %s: %s', opts['config'], err)
os.exit(1)
end
+
+ -- Load custom tokenizers if requested
+ if load_tokenizers then
+ local success, tokenizer_err = rspamd_config:load_custom_tokenizers()
+ if not success then
+ rspamd_logger.errx('cannot load custom tokenizers: %s', tokenizer_err or 'unknown error')
+ -- Don't exit here as custom tokenizers are optional
+ rspamd_logger.warnx('proceeding without custom tokenizers')
+ end
+ end
+end
+
+-- Helper function to ensure proper cleanup of tokenizers
+local function cleanup_tokenizers()
+ if rspamd_config then
+ rspamd_config:unload_custom_tokenizers()
+ end
end
local function load_task(_, fname)
@@ -288,13 +305,13 @@ local function load_task(_, fname)
if not res then
parser:error(string.format('cannot read message from %s: %s', fname,
- task))
+ task))
return nil
end
if not task:process_message() then
parser:error(string.format('cannot read message from %s: %s', fname,
- 'failed to parse'))
+ 'failed to parse'))
return nil
end
@@ -335,7 +352,6 @@ local function print_elts(elts, opts, func)
io.write(ucl.to_format(elts, output_fmt(opts)))
else
fun.each(function(fname, elt)
-
if not opts.json and not opts.ucl then
if func then
elt = fun.map(func, elt)
@@ -357,7 +373,7 @@ local function extract_handler(opts)
if opts.words then
-- Enable stemming and urls detection
- load_config(opts)
+ load_config(opts, true) -- Load with custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
rspamd_config:init_subsystem('langdet')
end
@@ -372,39 +388,38 @@ local function extract_handler(opts)
if not opts.json and not opts.ucl then
table.insert(out,
- rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
- part:get_mimepart():get_digest():sub(1, 8),
- t,
- part:get_language(),
- part:get_length(), part:get_raw_length(),
- part:get_words_count()))
+ rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
+ part:get_mimepart():get_digest():sub(1, 8),
+ t,
+ part:get_language(),
+ part:get_length(), part:get_raw_length(),
+ part:get_words_count()))
table.insert(out,
- rspamd_logger.slog('Stats: %s',
- fun.foldl(function(acc, k, v)
- if acc ~= '' then
- return string.format('%s, %s:%s', acc, k, v)
- else
- return string.format('%s:%s', k, v)
- end
- end, '', part:get_stats())))
+ rspamd_logger.slog('Stats: %s',
+ fun.foldl(function(acc, k, v)
+ if acc ~= '' then
+ return string.format('%s, %s:%s', acc, k, v)
+ else
+ return string.format('%s:%s', k, v)
+ end
+ end, '', part:get_stats())))
end
end
end
local function maybe_print_mime_part_info(part, out)
if opts.part then
-
if not opts.json and not opts.ucl then
local mtype, msubtype = part:get_type()
local det_mtype, det_msubtype = part:get_detected_type()
table.insert(out,
- rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
- part:get_digest():sub(1, 8),
- mtype, msubtype,
- det_mtype, det_msubtype,
- part:get_filename(),
- part:get_detected_ext(),
- part:get_length()))
+ rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
+ part:get_digest():sub(1, 8),
+ mtype, msubtype,
+ det_mtype, det_msubtype,
+ part:get_filename(),
+ part:get_detected_ext(),
+ part:get_length()))
end
end
end
@@ -416,17 +431,17 @@ local function extract_handler(opts)
return table.concat(words, ' ')
else
return table.concat(
- fun.totable(
- fun.map(function(w)
- -- [1] - stemmed word
- -- [2] - normalised word
- -- [3] - raw word
- -- [4] - flags (table of strings)
- return string.format('%s|%s|%s(%s)',
- w[3], w[2], w[1], table.concat(w[4], ','))
- end, words)
- ),
- ' '
+ fun.totable(
+ fun.map(function(w)
+ -- [1] - stemmed word
+ -- [2] - normalised word
+ -- [3] - raw word
+ -- [4] - flags (table of strings)
+ return string.format('%s|%s|%s(%s)',
+ w[3], w[2], w[1], table.concat(w[4], ','))
+ end, words)
+ ),
+ ' '
)
end
end
@@ -443,7 +458,7 @@ local function extract_handler(opts)
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], 'meta_words: ' ..
- print_words(task:get_meta_words(how_words), how_words == 'full'))
+ print_words(task:get_meta_words(how_words), how_words == 'full'))
end
if opts.text or opts.html then
@@ -466,7 +481,7 @@ local function extract_handler(opts)
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], print_words(part:get_words(how_words),
- how_words == 'full'))
+ how_words == 'full'))
else
table.insert(out_elts[fname], tostring(part:get_content(how)))
end
@@ -480,7 +495,7 @@ local function extract_handler(opts)
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], print_words(part:get_words(how_words),
- how_words == 'full'))
+ how_words == 'full'))
else
if opts.structure then
local hc = part:get_html()
@@ -489,11 +504,11 @@ local function extract_handler(opts)
local fun = require "fun"
if type(elt) == 'table' then
return table.concat(fun.totable(
- fun.map(
- function(t)
- return rspamd_logger.slog("%s", t)
- end,
- elt)), '\n')
+ fun.map(
+ function(t)
+ return rspamd_logger.slog("%s", t)
+ end,
+ elt)), '\n')
else
return rspamd_logger.slog("%s", elt)
end
@@ -524,7 +539,7 @@ local function extract_handler(opts)
if opts.invisible then
local hc = part:get_html()
table.insert(out_elts[fname], string.format('invisible content: %s',
- tostring(hc:get_invisible())))
+ tostring(hc:get_invisible())))
end
end
end
@@ -544,13 +559,18 @@ local function extract_handler(opts)
for _, task in ipairs(tasks) do
task:destroy()
end
+
+ -- Cleanup custom tokenizers if they were loaded
+ if opts.words then
+ cleanup_tokenizers()
+ end
end
local function stat_handler(opts)
local fun = require "fun"
local out_elts = {}
- load_config(opts)
+ load_config(opts, true) -- Load with custom tokenizers for stat generation
rspamd_url.init(rspamd_config:get_tld_path())
rspamd_config:init_subsystem('langdet,stat') -- Needed to gen stat tokens
@@ -571,10 +591,10 @@ local function stat_handler(opts)
out_elts[fname] = bt
process_func = function(e)
return string.format('%s (%d): "%s"+"%s", [%s]', e.data, e.win, e.t1 or "",
- e.t2 or "", table.concat(fun.totable(
- fun.map(function(k)
- return k
- end, e.flags)), ","))
+ e.t2 or "", table.concat(fun.totable(
+ fun.map(function(k)
+ return k
+ end, e.flags)), ","))
end
elseif opts.fuzzy then
local parts = task:get_parts() or {}
@@ -601,16 +621,16 @@ local function stat_handler(opts)
digest = digest,
shingles = shingles,
type = string.format('%s/%s',
- ({ part:get_type() })[1],
- ({ part:get_type() })[2])
+ ({ part:get_type() })[1],
+ ({ part:get_type() })[2])
})
else
table.insert(out_elts[fname], {
digest = part:get_digest(),
file = part:get_filename(),
type = string.format('%s/%s',
- ({ part:get_type() })[1],
- ({ part:get_type() })[2])
+ ({ part:get_type() })[1],
+ ({ part:get_type() })[2])
})
end
end
@@ -621,10 +641,13 @@ local function stat_handler(opts)
end
print_elts(out_elts, opts, process_func)
+
+ -- Cleanup custom tokenizers
+ cleanup_tokenizers()
end
local function urls_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- URLs don't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
local out_elts = {}
@@ -764,7 +787,7 @@ local function newline(task)
end
local function modify_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Modification doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
local function read_file(file)
@@ -804,10 +827,10 @@ local function modify_handler(opts)
if hname == name then
local new_value = string.format(hpattern, hdr.decoded)
new_value = string.format('%s:%s%s',
- name, hdr.separator,
- rspamd_util.fold_header(name,
- rspamd_util.mime_header_encode(new_value),
- task:get_newlines_type()))
+ name, hdr.separator,
+ rspamd_util.fold_header(name,
+ rspamd_util.mime_header_encode(new_value),
+ task:get_newlines_type()))
out[#out + 1] = new_value
return
end
@@ -816,12 +839,12 @@ local function modify_handler(opts)
if rewrite.need_rewrite_ct then
if name:lower() == 'content-type' then
local nct = string.format('%s: %s/%s; charset=utf-8',
- 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
+ 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
out[#out + 1] = nct
return
elseif name:lower() == 'content-transfer-encoding' then
out[#out + 1] = string.format('%s: %s',
- 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+ 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
seen_cte = true
return
end
@@ -837,13 +860,13 @@ local function modify_handler(opts)
if hname and hvalue then
out[#out + 1] = string.format('%s: %s', hname,
- rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
+ rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
end
end
if not seen_cte and rewrite.need_rewrite_ct then
out[#out + 1] = string.format('%s: %s',
- 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+ 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
end
-- End of headers
@@ -883,7 +906,7 @@ local function modify_handler(opts)
end
local function sign_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Signing doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
local lua_dkim = require("lua_ffi").dkim
@@ -927,11 +950,11 @@ local function sign_handler(opts)
io.flush()
else
local dkim_hdr = string.format('%s: %s%s',
- 'DKIM-Signature',
- rspamd_util.fold_header('DKIM-Signature',
- rspamd_util.mime_header_encode(sig),
- task:get_newlines_type()),
- newline(task))
+ 'DKIM-Signature',
+ rspamd_util.fold_header('DKIM-Signature',
+ rspamd_util.mime_header_encode(sig),
+ task:get_newlines_type()),
+ newline(task))
io.write(dkim_hdr)
io.flush()
task:get_content():save_in_file(1)
@@ -942,7 +965,7 @@ local function sign_handler(opts)
end
local function strip_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Stripping doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
for _, fname in ipairs(opts.file) do
@@ -998,7 +1021,7 @@ local function strip_handler(opts)
end
local function anonymize_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Anonymization doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
for _, fname in ipairs(opts.file) do
@@ -1103,7 +1126,7 @@ local function get_dump_content(task, opts, fname)
end
local function dump_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Dumping doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
for _, fname in ipairs(opts.file) do
diff --git a/lualib/rspamadm/statistics_dump.lua b/lualib/rspamadm/statistics_dump.lua
index 6bc045850..6a08d11fd 100644
--- a/lualib/rspamadm/statistics_dump.lua
+++ b/lualib/rspamadm/statistics_dump.lua
@@ -42,6 +42,12 @@ parser:option "-c --config"
:argname("<cfg>")
:default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
+parser:option "-b --batch-size"
+ :description "Number of entries to process at once"
+ :argname("<elts>")
+ :convert(tonumber)
+ :default(1000)
+
-- Extract subcommand
local dump = parser:command "dump d"
:description "Dump bayes statistics"
@@ -54,7 +60,7 @@ dump:mutex(
dump:flag "-c --compress"
:description "Compress output"
dump:option "-b --batch-size"
- :description "Number of entires to process at once"
+ :description "Number of entries to process at once"
:argname("<elts>")
:convert(tonumber)
:default(1000)
@@ -68,12 +74,12 @@ restore:argument "file"
:argname "<file>"
:args "*"
restore:option "-b --batch-size"
- :description "Number of entires to process at once"
+ :description "Number of entries to process at once"
:argname("<elts>")
:convert(tonumber)
:default(1000)
restore:option "-m --mode"
- :description "Number of entires to process at once"
+ :description "Number of entries to process at once"
:argname("<append|subtract|replace>")
:convert {
['append'] = 'append',
@@ -287,11 +293,11 @@ local function dump_pattern(conn, pattern, opts, out, key)
-- Do not write the last chunk of out as it will be processed afterwards
if cursor ~= 0 then
if opts.cdb then
- dump_out(out, opts, false)
- clear_fcn(out)
- else
dump_cdb(out, opts, false, key)
out[key].elts = {}
+ else
+ dump_out(out, opts, false)
+ clear_fcn(out)
end
elseif opts.cdb then
dump_cdb(out, opts, true, key)
@@ -541,4 +547,4 @@ return {
aliases = { 'stat_dump', 'bayes_dump' },
handler = handler,
description = parser._description
-} \ No newline at end of file
+}