diff options
Diffstat (limited to 'lualib')
-rw-r--r-- | lualib/lua_bayes_redis.lua | 67 | ||||
-rw-r--r-- | lualib/lua_cache.lua | 475 | ||||
-rw-r--r-- | lualib/lua_cfg_transform.lua | 22 | ||||
-rw-r--r-- | lualib/lua_dkim_tools.lua | 112 | ||||
-rw-r--r-- | lualib/lua_magic/patterns.lua | 17 | ||||
-rw-r--r-- | lualib/lua_magic/types.lua | 7 | ||||
-rw-r--r-- | lualib/lua_maps.lua | 100 | ||||
-rw-r--r-- | lualib/lua_maps_expressions.lua | 2 | ||||
-rw-r--r-- | lualib/lua_mime.lua | 24 | ||||
-rw-r--r-- | lualib/lua_mime_types.lua | 2 | ||||
-rw-r--r-- | lualib/lua_redis.lua | 54 | ||||
-rw-r--r-- | lualib/lua_scanners/cloudmark.lua | 52 | ||||
-rw-r--r-- | lualib/lua_scanners/icap.lua | 9 | ||||
-rw-r--r-- | lualib/lua_util.lua | 51 | ||||
-rw-r--r-- | lualib/plugins/neural.lua | 2 | ||||
-rw-r--r-- | lualib/plugins/rbl.lua | 5 | ||||
-rw-r--r-- | lualib/redis_scripts/bayes_cache_learn.lua | 17 | ||||
-rw-r--r-- | lualib/redis_scripts/bayes_classify.lua | 75 | ||||
-rw-r--r-- | lualib/redis_scripts/bayes_learn.lua | 55 | ||||
-rw-r--r-- | lualib/redis_scripts/neural_save_unlock.lua | 13 | ||||
-rw-r--r-- | lualib/rspamadm/dmarc_report.lua | 18 | ||||
-rw-r--r-- | lualib/rspamadm/mime.lua | 401 | ||||
-rw-r--r-- | lualib/rspamadm/statistics_dump.lua | 20 |
23 files changed, 1168 insertions, 432 deletions
diff --git a/lualib/lua_bayes_redis.lua b/lualib/lua_bayes_redis.lua index 782e6fc47..a7af80bf1 100644 --- a/lualib/lua_bayes_redis.lua +++ b/lualib/lua_bayes_redis.lua @@ -25,27 +25,44 @@ local ucl = require "ucl" local N = "bayes" local function gen_classify_functor(redis_params, classify_script_id) - return function(task, expanded_key, id, is_spam, stat_tokens, callback) - + return function(task, expanded_key, id, class_labels, stat_tokens, callback) local function classify_redis_cb(err, data) lua_util.debugm(N, task, 'classify redis cb: %s, %s', err, data) if err then callback(task, false, err) else - callback(task, true, data[1], data[2], data[3], data[4]) + -- Pass the raw data table to the C++ callback for processing + -- The C++ callback will handle both binary and multi-class formats + callback(task, true, data) + end + end + + -- Determine class labels to send to Redis script + local script_class_labels + if type(class_labels) == "table" then + -- Use simple comma-separated string instead of messagepack + script_class_labels = "TABLE:" .. table.concat(class_labels, ",") + else + -- Single class label or boolean compatibility + if class_labels == true or class_labels == "true" then + script_class_labels = "S" -- spam + elseif class_labels == false or class_labels == "false" then + script_class_labels = "H" -- ham + else + script_class_labels = class_labels -- string class label end end lua_redis.exec_redis_script(classify_script_id, { task = task, is_write = false, key = expanded_key }, - classify_redis_cb, { expanded_key, stat_tokens }) + classify_redis_cb, { expanded_key, script_class_labels, stat_tokens }) end end local function gen_learn_functor(redis_params, learn_script_id) - return function(task, expanded_key, id, is_spam, symbol, is_unlearn, stat_tokens, callback, maybe_text_tokens) + return function(task, expanded_key, id, class_label, symbol, is_unlearn, stat_tokens, callback, maybe_text_tokens) local function learn_redis_cb(err, data) - lua_util.debugm(N, task, 'learn redis cb: %s, %s', err, data) + lua_util.debugm(N, task, 'learn redis cb: %s, %s for class %s', err, data, class_label) if err then callback(task, false, err) else @@ -53,17 +70,24 @@ local function gen_learn_functor(redis_params, learn_script_id) end end + -- Convert class_label for backward compatibility + local script_class_label = class_label + if class_label == true or class_label == "true" then + script_class_label = "S" -- spam + elseif class_label == false or class_label == "false" then + script_class_label = "H" -- ham + end + if maybe_text_tokens then lua_redis.exec_redis_script(learn_script_id, { task = task, is_write = true, key = expanded_key }, learn_redis_cb, - { expanded_key, tostring(is_spam), symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens }) + { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens }) else lua_redis.exec_redis_script(learn_script_id, { task = task, is_write = true, key = expanded_key }, - learn_redis_cb, { expanded_key, tostring(is_spam), symbol, tostring(is_unlearn), stat_tokens }) + learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens }) end - end end @@ -112,8 +136,7 @@ end --- @param classifier_ucl ucl of the classifier config --- @param statfile_ucl ucl of the statfile config --- @return a pair of (classify_functor, learn_functor) or `nil` in case of error -exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol, is_spam, ev_base, stat_periodic_cb) - +exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol, class_label, ev_base, stat_periodic_cb) local redis_params = load_redis_params(classifier_ucl, statfile_ucl) if not redis_params then @@ -137,7 +160,6 @@ exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol, if ev_base then rspamd_config:add_periodic(ev_base, 0.0, function(cfg, _) - local function stat_redis_cb(err, data) lua_util.debugm(N, cfg, 'stat redis cb: %s, %s', err, data) @@ -162,11 +184,22 @@ exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol, end end + -- Convert class_label to learn key + local learn_key + if class_label == true or class_label == "true" or class_label == "S" then + learn_key = "learns_spam" + elseif class_label == false or class_label == "false" or class_label == "H" then + learn_key = "learns_ham" + else + -- For other class labels, use learns_<class_label> + learn_key = "learns_" .. string.lower(tostring(class_label)) + end + lua_redis.exec_redis_script(stat_script_id, { ev_base = ev_base, cfg = cfg, is_write = false }, stat_redis_cb, { tostring(cursor), symbol, - is_spam and "learns_spam" or "learns_ham", + learn_key, tostring(max_users) }) return statfile_ucl.monitor_timeout or classifier_ucl.monitor_timeout or 30.0 end) @@ -178,7 +211,6 @@ end local function gen_cache_check_functor(redis_params, check_script_id, conf) local packed_conf = ucl.to_format(conf, 'msgpack') return function(task, cache_id, callback) - local function classify_redis_cb(err, data) lua_util.debugm(N, task, 'check cache redis cb: %s, %s (%s)', err, data, type(data)) if err then @@ -201,17 +233,16 @@ end local function gen_cache_learn_functor(redis_params, learn_script_id, conf) local packed_conf = ucl.to_format(conf, 'msgpack') - return function(task, cache_id, is_spam) + return function(task, cache_id, class_name, class_id) local function learn_redis_cb(err, data) lua_util.debugm(N, task, 'learn_cache redis cb: %s, %s', err, data) end - lua_util.debugm(N, task, 'try to learn cache: %s', cache_id) + lua_util.debugm(N, task, 'try to learn cache: %s as %s (id=%s)', cache_id, class_name, class_id) lua_redis.exec_redis_script(learn_script_id, { task = task, is_write = true, key = cache_id }, learn_redis_cb, - { cache_id, is_spam and "1" or "0", packed_conf }) - + { cache_id, tostring(class_id), packed_conf }) end end diff --git a/lualib/lua_cache.lua b/lualib/lua_cache.lua new file mode 100644 index 000000000..c87a9dc78 --- /dev/null +++ b/lualib/lua_cache.lua @@ -0,0 +1,475 @@ +--[[ +Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +--[[[ +-- @module lua_cache +-- This module provides a Redis-based caching API for Rspamd with support for +-- concurrent operations across multiple workers. It includes features like +-- distributed locking via PENDING markers, automatic key hashing, +-- configurable serialization formats, and TTL management. +-- +@example +local redis_cache = require "lua_cache" +local redis_params = redis_lib.parse_redis_server('reputation') + +-- Create cache context +local cache_context = redis_cache.create_cache_context(redis_params, { + cache_prefix = "rspamd_reputation", + cache_ttl = 86400, -- 1 day + cache_format = "json", + cache_hash_len = 16, + cache_use_hashing = true +}) + +-- Example usage in a task +local function process_url_reputation(task, url) + local cache_key = url:get_tld() + + -- Try to get data from cache first + redis_cache.cache_get(task, cache_key, cache_context, 5.0, + -- This callback is called on cache miss + function(task) + -- Perform expensive reputation lookup + local reputation = calculate_reputation(task, url) + + -- Store result in cache for future use + redis_cache.cache_set(task, cache_key, { + score = reputation.score, + categories = reputation.categories, + timestamp = os.time() + }, cache_context) + + -- Use the result + apply_reputation_rules(task, url, reputation) + end, + -- This callback is called when cache data is available + function(task, err, data) + if err then + logger.errx(task, "Cache error for %s: %s", cache_key, err) + return + end + + -- Use the cached data + apply_reputation_rules(task, url, data) + end + ) +end +--]] + +local logger = require "rspamd_logger" +local ucl = require "ucl" +local lua_util = require "lua_util" +local rspamd_util = require "rspamd_util" +local lua_redis = require "lua_redis" +local hasher = require "rspamd_cryptobox_hash" + +local N = "lua_cache" +local exports = {} + +-- Default options +local default_opts = { + cache_prefix = "rspamd_cache", + cache_ttl = 3600, -- 1 hour + cache_probes = 5, -- Number of times to check a pending key + cache_format = "json", -- Serialization format + cache_hash_len = 16, -- Number of hex symbols to use for hashed keys + cache_use_hashing = false -- Whether to hash keys by default +} + +-- Create a hash of the key using the configured length +local function hash_key(key, hash_len) + local h = hasher.create(key) + local hex = h:hex() + + if hash_len and hash_len > 0 and hash_len < #hex then + return string.sub(hex, 1, hash_len) + end + + return hex +end + +-- Get the appropriate key based on hashing configuration +local function get_cache_key(raw_key, cache_context, force_hashing) + -- Determine whether to hash based on context settings and force parameter + local should_hash = force_hashing + if should_hash == nil then + should_hash = cache_context.opts.cache_use_hashing + end + + if should_hash then + lua_util.debugm(N, rspamd_config, "hashing key '%s' with hash length %s", + raw_key, cache_context.opts.cache_hash_len) + return hash_key(raw_key, cache_context.opts.cache_hash_len) + else + return raw_key + end +end + +-- Create a caching context with the provided options +local function create_cache_context(redis_params, opts, module_name) + if not redis_params then + return nil, "Redis parameters must be provided" + end + + local cache_context = {} + cache_context.redis_params = redis_params + + -- Process and merge configuration options + cache_context.opts = lua_util.override_defaults(default_opts, opts) + cache_context.N = module_name or N + + -- Register Redis prefix + lua_redis.register_prefix(cache_context.opts.cache_prefix, + "caching", + "Cache API prefix") + + lua_util.debugm(N, rspamd_config, "registered redis prefix: %s", cache_context.opts.cache_prefix) + + -- Remove cache related options from opts table + if opts then + lua_util.debugm(N, rspamd_config, "removing cache options from original opts table") + opts.cache_prefix = nil + opts.cache_ttl = nil + opts.cache_probes = nil + opts.cache_format = nil + opts.cache_hash_len = nil + opts.cache_use_hashing = nil + end + + -- Set serialization and deserialization functions + if cache_context.opts.cache_format == "messagepack" then + lua_util.debugm(cache_context.N, rspamd_config, "using messagepack for serialization") + + cache_context.encode = function(data) + return ucl.to_format(data, 'msgpack') + end + + cache_context.decode = function(raw_data) + local ucl_parser = ucl.parser() + local ok, ucl_err = ucl_parser:parse_text(raw_data, 'messagepack') + if not ok then + lua_util.debugm(cache_context.N, rspamd_config, "failed to parse messagepack data: %s", ucl_err) + return nil + end + return ucl_parser:get_object() + end + else + -- Default to JSON + lua_util.debugm(cache_context.N, rspamd_config, "using json for serialization") + + cache_context.encode = function(data) + return ucl.to_format(data, 'json') + end + + cache_context.decode = function(raw_data) + local ucl_parser = ucl.parser() + local ok, ucl_err = ucl_parser:parse_text(raw_data) + if not ok then + lua_util.debugm(cache_context.N, rspamd_config, "failed to parse json data: %s", ucl_err) + return nil + end + return ucl_parser:get_object() + end + end + + lua_util.debugm(cache_context.N, rspamd_config, "cache context created: %s", cache_context.opts) + return cache_context +end + +-- Encode data for storage in Redis with proper formatting +local function encode_data(data, cache_context) + lua_util.debugm(cache_context.N, rspamd_config, "encoding data using %s format", cache_context.opts.cache_format) + return cache_context.encode(data) +end + +-- Decode data from Redis with proper formatting +local function decode_data(data, cache_context) + if not data then + lua_util.debugm(cache_context.N, rspamd_config, "cannot decode nil data") + return nil + end + lua_util.debugm(cache_context.N, rspamd_config, "decoding data using %s format", cache_context.opts.cache_format) + return cache_context.decode(data) +end + +-- Check if a value is a PENDING marker and extract its details +local function parse_pending_value(value, cache_context) + if type(value) ~= 'string' then + lua_util.debugm(cache_context.N, rspamd_config, "value is not a string, cannot be a pending marker") + return nil + end + + -- Check if the value starts with PENDING: + if string.sub(value, 1, 8) ~= "PENDING:" then + lua_util.debugm(cache_context.N, rspamd_config, "value doesn't start with PENDING: prefix") + return nil + end + + lua_util.debugm(cache_context.N, rspamd_config, "found PENDING marker, extracting data") + local pending_data = string.sub(value, 9) + return decode_data(pending_data, cache_context) +end + +-- Create a pending marker with hostname and timeout +local function create_pending_marker(timeout, cache_context) + local hostname = rspamd_util.get_hostname() + local pending_data = { + hostname = hostname, + timeout = timeout, + timestamp = os.time() + } + + lua_util.debugm(cache_context.N, rspamd_config, "creating PENDING marker for host %s, timeout %s", + hostname, timeout) + + return "PENDING:" .. encode_data(pending_data, cache_context) +end + +-- Check cache and handle the result appropriately +local function cache_get(task, key, cache_context, timeout, callback_uncached, callback_data) + if not task or not key or not cache_context or not callback_uncached or not callback_data then + logger.errx(task, "missing required parameters for cache_get") + return false + end + + local full_key = cache_context.opts.cache_prefix .. "_" .. get_cache_key(key, cache_context, false) + lua_util.debugm(cache_context.N, task, "cache lookup for key: %s (%s)", key, full_key) + + -- Function to check a pending key + local function check_pending(pending_info) + local probe_count = 0 + local probe_interval = timeout / (cache_context.opts.cache_probes or 5) + + lua_util.debugm(cache_context.N, task, "setting up probes for pending key %s, interval: %s seconds", + full_key, probe_interval) + + -- Set up a timer to probe the key + local function probe_key() + probe_count = probe_count + 1 + lua_util.debugm(cache_context.N, task, "probe #%s/%s for pending key %s", + probe_count, cache_context.opts.cache_probes, full_key) + + if probe_count >= cache_context.opts.cache_probes then + logger.infox(task, "maximum probes reached for key %s, considering it failed", full_key) + lua_util.debugm(cache_context.N, task, "maximum probes reached for key %s, giving up", full_key) + callback_data(task, "timeout waiting for pending key", nil) + return + end + + lua_util.debugm(cache_context.N, task, "probing redis for key %s", full_key) + lua_redis.redis_make_request(task, cache_context.redis_params, key, false, + function(err, data) + if err then + logger.errx(task, "redis error while probing key %s: %s", full_key, err) + lua_util.debugm(cache_context.N, task, "redis error during probe: %s, retrying later", err) + task:add_timer(probe_interval, probe_key) + return + end + + if not data or type(data) == 'userdata' then + lua_util.debugm(cache_context.N, task, "pending key %s disappeared, calling uncached handler", full_key) + callback_uncached(task) + return + end + + local pending = parse_pending_value(data, cache_context) + if pending then + lua_util.debugm(cache_context.N, task, "key %s still pending (host: %s), retrying later", + full_key, pending.hostname) + task:add_timer(probe_interval, probe_key) + else + lua_util.debugm(cache_context.N, task, "pending key %s resolved to actual data", full_key) + callback_data(task, nil, decode_data(data, cache_context)) + end + end, + 'GET', { full_key } + ) + end + + -- Start the first probe after the initial probe interval + lua_util.debugm(cache_context.N, task, "scheduling first probe for %s in %s seconds", + full_key, probe_interval) + task:add_timer(probe_interval, probe_key) + end + + -- Initial cache lookup + lua_util.debugm(cache_context.N, task, "making initial redis GET request for key: %s", full_key) + lua_redis.redis_make_request(task, cache_context.redis_params, key, false, + function(err, data) + if err then + logger.errx(task, "redis error looking up key %s: %s", full_key, err) + lua_util.debugm(cache_context.N, task, "redis error: %s, calling uncached handler", err) + callback_uncached(task) + return + end + + if not data or type(data) == 'userdata' then + -- Key not found, set pending and call the uncached callback + lua_util.debugm(cache_context.N, task, "key %s not found in cache, creating pending marker", full_key) + local pending_marker = create_pending_marker(timeout, cache_context) + + lua_util.debugm(cache_context.N, task, "setting pending marker for key %s with TTL %s", + full_key, timeout * 2) + lua_redis.redis_make_request(task, cache_context.redis_params, key, true, + function(set_err, set_data) + if set_err then + logger.errx(task, "redis error setting pending marker for %s: %s", full_key, set_err) + lua_util.debugm(cache_context.N, task, "failed to set pending marker: %s", set_err) + else + lua_util.debugm(cache_context.N, task, "successfully set pending marker for %s", full_key) + end + lua_util.debugm(cache_context.N, task, "calling uncached handler for %s", full_key) + callback_uncached(task) + end, + 'SETEX', { full_key, tostring(timeout * 2), pending_marker } + ) + else + -- Key found, check if it's a pending marker or actual data + local pending = parse_pending_value(data, cache_context) + + if pending then + -- Key is being processed by another worker + lua_util.debugm(cache_context.N, task, "key %s is pending on host %s, waiting for result", + full_key, pending.hostname) + check_pending(pending) + else + -- Extend TTL and return data + lua_util.debugm(cache_context.N, task, "found cached data for key %s, extending TTL to %s", + full_key, cache_context.opts.cache_ttl) + lua_redis.redis_make_request(task, cache_context.redis_params, key, true, + function(expire_err, _) + if expire_err then + logger.errx(task, "redis error extending TTL for %s: %s", full_key, expire_err) + lua_util.debugm(cache_context.N, task, "failed to extend TTL: %s", expire_err) + else + lua_util.debugm(cache_context.N, task, "successfully extended TTL for %s", full_key) + end + end, + 'EXPIRE', { full_key, tostring(cache_context.opts.cache_ttl) } + ) + + lua_util.debugm(cache_context.N, task, "returning cached data for key %s", full_key) + callback_data(task, nil, decode_data(data, cache_context)) + end + end + end, + 'GET', { full_key } + ) + + return true +end + +-- Save data to the cache +local function cache_set(task, key, data, cache_context) + if not task or not key or not data or not cache_context then + logger.errx(task, "missing required parameters for cache_set") + return false + end + + local full_key = cache_context.opts.cache_prefix .. "_" .. get_cache_key(key, cache_context, false) + lua_util.debugm(cache_context.N, task, "caching data for key: %s (%s) with TTL: %s", + full_key, key, cache_context.opts.cache_ttl) + + local encoded_data = encode_data(data, cache_context) + + -- Store the data with expiration + lua_util.debugm(cache_context.N, task, "making redis SETEX request for key: %s", full_key) + return lua_redis.redis_make_request(task, cache_context.redis_params, key, true, + function(err, result) + if err then + logger.errx(task, "redis error setting cached data for %s: %s", full_key, err) + lua_util.debugm(cache_context.N, task, "failed to cache data: %s", err) + else + lua_util.debugm(cache_context.N, task, "successfully cached data for key %s", full_key) + end + end, + 'SETEX', { full_key, tostring(cache_context.opts.cache_ttl), encoded_data } + ) +end + +-- Delete a cache entry +local function cache_del(task, key, cache_context) + if not task or not key or not cache_context then + logger.errx(task, "missing required parameters for cache_del") + return false + end + + local full_key = cache_context.opts.cache_prefix .. "_" .. get_cache_key(key, cache_context, false) + lua_util.debugm(cache_context.N, task, "deleting cache key: %s", full_key) + + return lua_redis.redis_make_request(task, cache_context.redis_params, key, true, + function(err, result) + if err then + logger.errx(task, "redis error deleting cache key %s: %s", full_key, err) + lua_util.debugm(cache_context.N, task, "failed to delete cache key: %s", err) + else + local count = tonumber(result) or 0 + lua_util.debugm(cache_context.N, task, "successfully deleted cache key %s (%s keys removed)", + full_key, count) + end + end, + 'DEL', { full_key } + ) +end + +-- Export the API functions +---[[[ +-- @function lua_cache.create_cache_context(redis_params, opts, module_name) +-- Creates a Redis caching context with specified parameters and options +-- @param {table} redis_params Redis connection parameters (required) +-- @param {table} opts Optional configuration parameters: +-- * `cache_prefix`: Key prefix for Redis (default: "rspamd_cache") +-- * `cache_ttl`: TTL in seconds for cached entries (default: 3600) +-- * `cache_probes`: Number of times to check pending keys (default: 5) +-- * `cache_format`: Serialization format - "json" or "messagepack" (default: "json") +-- * `cache_hash_len`: Number of hex symbols for hashed keys (default: 16) +-- * `cache_use_hashing`: Whether to hash keys by default (default: true) +-- @return {table} Cache context or nil + error message on failure +--]] +exports.create_cache_context = create_cache_context +---[[[ +-- @function รง.cache_get(task, key, cache_context, timeout, callback_uncached, callback_data) +-- Retrieves data from cache, handling pending states and cache misses appropriately +-- @param {rspamd_task} task Current task (required) +-- @param {string} key Cache key (required) +-- @param {table} cache_context Redis cache context from create_cache_context (required) +-- @param {number} timeout Timeout for pending operations in seconds (required) +-- @param {function} callback_uncached Function to call on cache miss: callback_uncached(task) (required) +-- @param {function} callback_data Function to call when data is available: callback_data(task, err, data) (required) +-- @return {boolean} true if request was initiated successfully, false otherwise +--]] +exports.cache_get = cache_get +---[[[ +-- @function lua_cache.cache_set(task, key, data, cache_context) +-- Stores data in the cache with the configured TTL +-- @param {rspamd_task} task Current task (required) +-- @param {string} key Cache key (required) +-- @param {table} data Data to store in the cache (required) +-- @param {table} cache_context Redis cache context from create_cache_context (required) +-- @return {boolean} true if request was initiated successfully, false otherwise +--]] +exports.cache_set = cache_set +---[[[ +-- @function lua_cache.cache_del(task, key, cache_context) +-- Deletes data from the cache +-- @param {rspamd_task} task Current task (required) +-- @param {string} key Cache key (required) +-- @param {table} cache_context Redis cache context from create_cache_context (required) +-- @return {boolean} true if request was initiated successfully, false otherwise +--]] +exports.cache_del = cache_del + +return exports diff --git a/lualib/lua_cfg_transform.lua b/lualib/lua_cfg_transform.lua index 265ca34c0..ec11ef299 100644 --- a/lualib/lua_cfg_transform.lua +++ b/lualib/lua_cfg_transform.lua @@ -198,20 +198,22 @@ end local function symbol_transform(cfg, k, v) local groups = cfg:at('group') - -- first try to find any group where there is a definition of this symbol - for gr_n, gr in groups:pairs() do - local symbols = gr:at('symbols') - if symbols and symbols:at(k) then - -- We override group symbol with ungrouped symbol - logger.infox("overriding group symbol %s in the group %s", k, gr_n) - symbols[k] = lua_util.override_defaults(symbols:at(k):unwrap(), v:unwrap()) - return + if groups then + -- first try to find any group where there is a definition of this symbol + for gr_n, gr in groups:pairs() do + local symbols = gr:at('symbols') + if symbols and symbols:at(k) then + -- We override group symbol with ungrouped symbol + logger.infox("overriding group symbol %s in the group %s", k, gr_n) + symbols[k] = lua_util.override_defaults(symbols:at(k):unwrap(), v:unwrap()) + return + end end end -- Now check what Rspamd knows about this symbol local sym = rspamd_config:get_symbol(k) - if not sym or not sym.group then + if groups and (not sym or not sym.group) then -- Otherwise we just use group 'ungrouped' if not groups:at('ungrouped') then groups.ungrouped = { @@ -374,7 +376,7 @@ return function(cfg) local next_act = actions_order[j] if actions:at(next_act) and actions:at(next_act):type() == 'number' then local next_score = actions:at(next_act):unwrap() - if next_score <= score then + if type(score) == 'number' and type(next_score) == 'number' and next_score <= score then logger.errx(rspamd_config, 'invalid actions thresholds order: action %s (%s) must have lower ' .. 'score than action %s (%s)', act, score, next_act, next_score) ret = false diff --git a/lualib/lua_dkim_tools.lua b/lualib/lua_dkim_tools.lua index b7f520fae..69c9462b5 100644 --- a/lualib/lua_dkim_tools.lua +++ b/lualib/lua_dkim_tools.lua @@ -13,7 +13,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -]]-- +]] -- local exports = {} @@ -33,7 +33,7 @@ local function check_violation(N, task, domain) if task:has_symbol(sym_check) then local sym = task:get_symbol(sym_check)[1] logger.infox(task, 'skip signing for %s: violation %s found: %s', - domain, sym_check, sym.options) + domain, sym_check, sym.options) return false end @@ -92,7 +92,6 @@ local function parse_dkim_http_headers(N, task, settings) local key = task:get_request_header(headers.key_header) if not (domain and selector and key) then - logger.errx(task, 'missing required headers to sign email') return false, {} end @@ -258,14 +257,14 @@ local function prepare_dkim_signing(N, task, settings) -- OpenDKIM style if is_skip_sign() then lua_util.debugm(N, task, - 'skip signing: is_sign_network: %s, is_authed: %s, is_local: %s', - is_sign_networks, is_authed, is_local) + 'skip signing: is_sign_network: %s, is_authed: %s, is_local: %s', + is_sign_networks, is_authed, is_local) return false, {} end if not hfrom or not hfrom[1] or not hfrom[1].addr then lua_util.debugm(N, task, - 'signing_table: cannot get data when no header from is presented') + 'signing_table: cannot get data when no header from is presented') return false, {} end local sign_entry = settings.signing_table:get_key(hfrom[1].addr:lower()) @@ -273,7 +272,7 @@ local function prepare_dkim_signing(N, task, settings) if sign_entry then -- Check opendkim style entries lua_util.debugm(N, task, - 'signing_table: found entry for %s: %s', hfrom[1].addr, sign_entry) + 'signing_table: found entry for %s: %s', hfrom[1].addr, sign_entry) if sign_entry == '%' then sign_entry = hdom end @@ -291,7 +290,7 @@ local function prepare_dkim_signing(N, task, settings) if not selector then logger.errx(task, 'no selector defined for sign_entry %s, key_entry %s', - sign_entry, key_entry) + sign_entry, key_entry) return false, {} end @@ -305,11 +304,11 @@ local function prepare_dkim_signing(N, task, settings) if st:sub(1, 1) == '/' or st == './' or st == '..' then res.key = parts[2]:gsub('%%', hdom) lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, key file=%s', - hdom, selector, res.domain, res.key) + hdom, selector, res.domain, res.key) else res.rawkey = parts[2] -- No sanity check here lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, raw key used', - hdom, selector, res.domain) + hdom, selector, res.domain) end return true, { res } @@ -327,56 +326,56 @@ local function prepare_dkim_signing(N, task, settings) if st:sub(1, 1) == '/' or st == './' or st == '..' then res.key = parts[3]:gsub('%%', hdom) lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, key file=%s', - hdom, selector, res.domain, res.key) + hdom, selector, res.domain, res.key) else res.rawkey = parts[3] -- No sanity check here lua_util.debugm(N, task, 'perform dkim signing for %s, selector=%s, domain=%s, raw key used', - hdom, selector, res.domain) + hdom, selector, res.domain) end return true, { res } else logger.errx(task, 'invalid key entry for sign entry %s: %s; when signing %s domain', - sign_entry, key_entry, hdom) + sign_entry, key_entry, hdom) return false, {} end elseif settings.use_vault then -- Sign table is presented, the rest is covered by vault lua_util.debugm(N, task, 'check vault for %s, by sign entry %s, key entry is missing', - hdom, sign_entry) + hdom, sign_entry) return true, { domain = sign_entry, vault = true } else logger.errx(task, 'missing key entry for sign entry %s; when signing %s domain', - sign_entry, hdom) + sign_entry, hdom) return false, {} end else logger.errx(task, 'cannot get key entry for signing entry %s, when signing %s domain', - sign_entry, hdom) + sign_entry, hdom) return false, {} end else lua_util.debugm(N, task, - 'signing_table: no entry for %s', hfrom[1].addr) + 'signing_table: no entry for %s', hfrom[1].addr) return false, {} end else if settings.use_domain_sign_networks and is_sign_networks then dkim_domain = get_dkim_domain('use_domain_sign_networks') lua_util.debugm(N, task, - 'sign_networks: use domain(%s) for signature: %s', - settings.use_domain_sign_networks, dkim_domain) + 'sign_networks: use domain(%s) for signature: %s', + settings.use_domain_sign_networks, dkim_domain) elseif settings.use_domain_sign_local and is_local then dkim_domain = get_dkim_domain('use_domain_sign_local') lua_util.debugm(N, task, 'local: use domain(%s) for signature: %s', - settings.use_domain_sign_local, dkim_domain) + settings.use_domain_sign_local, dkim_domain) elseif settings.use_domain_sign_inbound and not is_local and not auser then dkim_domain = get_dkim_domain('use_domain_sign_inbound') lua_util.debugm(N, task, 'inbound: use domain(%s) for signature: %s', - settings.use_domain_sign_inbound, dkim_domain) + settings.use_domain_sign_inbound, dkim_domain) elseif settings.use_domain_custom then if type(settings.use_domain_custom) == 'string' then -- Load custom function @@ -387,10 +386,10 @@ local function prepare_dkim_signing(N, task, settings) settings.use_domain_custom = res_or_err dkim_domain = settings.use_domain_custom(task) lua_util.debugm(N, task, 'use custom domain for signing: %s', - dkim_domain) + dkim_domain) else logger.errx(task, 'cannot load dkim domain custom script: invalid type: %s, expected function', - type(res_or_err)) + type(res_or_err)) settings.use_domain_custom = nil end else @@ -400,12 +399,12 @@ local function prepare_dkim_signing(N, task, settings) else dkim_domain = settings.use_domain_custom(task) lua_util.debugm(N, task, 'use custom domain for signing: %s', - dkim_domain) + dkim_domain) end else dkim_domain = get_dkim_domain('use_domain') lua_util.debugm(N, task, 'use domain(%s) for signature: %s', - settings.use_domain, dkim_domain) + settings.use_domain, dkim_domain) end end @@ -467,7 +466,7 @@ local function prepare_dkim_signing(N, task, settings) }) else lua_util.debugm(N, task, 'domain %s is not designated for vault', - dkim_domain) + dkim_domain) end else -- TODO: try every domain in the vault @@ -501,7 +500,7 @@ local function prepare_dkim_signing(N, task, settings) if ret then table.insert(p, k) lua_util.debugm(N, task, 'using mempool selector %s with key %s', - k.selector, k.key) + k.selector, k.key) end end @@ -530,11 +529,11 @@ local function prepare_dkim_signing(N, task, settings) if not settings.use_redis then insert_or_update_prop(N, task, p, 'key', - 'default path', settings.path) + 'default path', settings.path) end insert_or_update_prop(N, task, p, 'selector', - 'default selector', settings.selector) + 'default selector', settings.selector) if settings.check_violation then if not check_violation(N, task, p.domain) then @@ -543,7 +542,7 @@ local function prepare_dkim_signing(N, task, settings) end insert_or_update_prop(N, task, p, 'domain', 'dkim_domain', - dkim_domain) + dkim_domain) return #p > 0 and true or false, p end @@ -560,53 +559,53 @@ exports.sign_using_redis = function(N, task, settings, selectors, sign_func, err local function redis_key_cb(err, data) if err then err_func(string.format("cannot make request to load DKIM key for %s: %s", - rk, err)) + rk, err)) elseif type(data) ~= 'string' then lua_util.debugm(N, task, "missing DKIM key for %s", rk) else p.rawkey = data lua_util.debugm(N, task, 'found and parsed key for %s:%s in Redis', - p.domain, p.selector) + p.domain, p.selector) sign_func(task, p) end end local rret = lua_redis.redis_make_request(task, - settings.redis_params, -- connect params - rk, -- hash key - false, -- is write - redis_key_cb, --callback - 'HGET', -- command - { settings.key_prefix, rk } -- arguments + settings.redis_params, -- connect params + rk, -- hash key + false, -- is write + redis_key_cb, --callback + 'HGET', -- command + { settings.key_prefix, rk } -- arguments ) if not rret then err_func(task, - string.format("cannot make request to load DKIM key for %s", rk)) + string.format("cannot make request to load DKIM key for %s", rk)) end end for _, p in ipairs(selectors) do if settings.selector_prefix then logger.infox(task, "using selector prefix '%s' for domain '%s'", - settings.selector_prefix, p.domain); + settings.selector_prefix, p.domain); local function redis_selector_cb(err, data) if err or type(data) ~= 'string' then err_func(task, string.format("cannot make request to load DKIM selector for domain %s: %s", - p.domain, err)) + p.domain, err)) else try_redis_key(data, p) end end local rret = lua_redis.redis_make_request(task, - settings.redis_params, -- connect params - p.domain, -- hash key - false, -- is write - redis_selector_cb, --callback - 'HGET', -- command - { settings.selector_prefix, p.domain } -- arguments + settings.redis_params, -- connect params + p.domain, -- hash key + false, -- is write + redis_selector_cb, --callback + 'HGET', -- command + { settings.selector_prefix, p.domain } -- arguments ) if not rret then err_func(task, string.format("cannot make Redis request to load DKIM selector for domain %s", - p.domain)) + p.domain)) end else try_redis_key(p.selector, p) @@ -619,25 +618,25 @@ exports.sign_using_vault = function(N, task, settings, selector, sign_func, err_ local ucl = require "ucl" local full_url = string.format('%s/v1/%s/%s', - settings.vault_url, settings.vault_path or 'dkim', selector.domain) + settings.vault_url, settings.vault_path or 'dkim', selector.domain) local upstream_list = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), settings.vault_url) local function vault_callback(err, code, body, _) if code ~= 200 then err_func(task, string.format('cannot request data from the vault url: %s; %s (%s)', - full_url, err, body)) + full_url, err, body)) else local parser = ucl.parser() local res, parser_err = parser:parse_string(body) if not res then err_func(task, string.format('vault reply for %s (data=%s) cannot be parsed: %s', - full_url, body, parser_err)) + full_url, body, parser_err)) else local obj = parser:get_object() if not obj or not obj.data then err_func(task, string.format('vault reply for %s (data=%s) is invalid, no data', - full_url, body)) + full_url, body)) else local elts = obj.data.selectors or {} local errs = {} @@ -675,13 +674,13 @@ exports.sign_using_vault = function(N, task, settings, selector, sign_func, err_ alg = p.alg, } lua_util.debugm(N, task, 'found and parsed key for %s:%s in Vault', - dkim_sign_data.domain, dkim_sign_data.selector) + dkim_sign_data.domain, dkim_sign_data.selector) nvalid = nvalid + 1 sign_func(task, dkim_sign_data) end, fun.filter(is_selector_valid, elts)) for _, e in errs do lua_util.debugm(N, task, 'error found during processing Vault selectors: %s:%s', - e[1], e[2]) + e[1], e[2]) end if nvalid == 0 then @@ -707,7 +706,7 @@ exports.sign_using_vault = function(N, task, settings, selector, sign_func, err_ if not ret then err_func(task, string.format("cannot make HTTP request to load DKIM data domain %s", - selector.domain)) + selector.domain)) end end @@ -732,8 +731,7 @@ exports.process_signing_settings = function(N, settings, opts) selector_map = { 'map', 'DKIM selectors' }, signing_table = { 'glob', 'DKIM signing table' }, key_table = { 'glob', 'DKIM keys table' }, - vault_domains = { 'glob', 'DKIM signing domains in vault' }, - whitelisted_signers_map = { 'set', 'ARC trusted signers domains' } + vault_domains = { 'glob', 'DKIM signing domains in vault' } } for k, v in pairs(opts) do local maybe_map = maps_opts[k] diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua index 971ddd95f..4a5abd8ce 100644 --- a/lualib/lua_magic/patterns.lua +++ b/lualib/lua_magic/patterns.lua @@ -466,6 +466,23 @@ local patterns = { }, } }, + heic = { + matches = { + { + -- HEIC/HEIF file format signature + -- Starts with ftyp followed by specific brand identifiers + string = "^....ftyphe[im][cs]", + position = 12, + weight = 60, + }, + { + -- Alternative signature for HEIC/HEIF + string = [[^....ftypmif1]], + position = 12, + weight = 60, + }, + } + }, } return patterns diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua index 3dce2e1f8..ad4ae4349 100644 --- a/lualib/lua_magic/types.lua +++ b/lualib/lua_magic/types.lua @@ -279,6 +279,11 @@ local types = { ct = 'image/bmp', av_check = false, }, + heic = { + type = 'image', + ct = 'image/heic', + av_check = false, + }, dwg = { type = 'image', ct = 'image/vnd.dwg', @@ -324,4 +329,4 @@ local types = { }, } -return types
\ No newline at end of file +return types diff --git a/lualib/lua_maps.lua b/lualib/lua_maps.lua index 2699ea214..c45b51b97 100644 --- a/lualib/lua_maps.lua +++ b/lualib/lua_maps.lua @@ -88,16 +88,64 @@ end local external_map_schema = ts.shape { external = ts.equivalent(true), -- must be true - backend = ts.string, -- where to get data, required - method = ts.one_of { "body", "header", "query" }, -- how to pass input + backend = ts.string:is_optional(), -- where to get data, required for HTTP + cdb = ts.string:is_optional(), -- path to CDB file, required for CDB + method = ts.one_of { "body", "header", "query" }:is_optional(), -- how to pass input encode = ts.one_of { "json", "messagepack" }:is_optional(), -- how to encode input (if relevant) timeout = (ts.number + ts.string / lua_util.parse_time_interval):is_optional(), } +-- Storage for CDB instances +local cdb_maps = {} +local cdb_finisher_set = false + local rspamd_http = require "rspamd_http" local ucl = require "ucl" +-- Function to handle CDB maps +local function handle_cdb_map(map_config, key, callback, task) + local rspamd_cdb = require "rspamd_cdb" + local hash_key = map_config.cdb + + -- Check if we need to open the CDB file + if not cdb_maps[hash_key] then + local cdb_file = map_config.cdb + -- Provide ev_base to monitor changes + local cdb_handle = rspamd_cdb.open(cdb_file, task:get_ev_base()) + + if not cdb_handle then + local err_msg = string.format("Failed to open CDB file: %s", cdb_file) + rspamd_logger.errx(task, err_msg) + if callback then + callback(false, err_msg, 500, task) + end + return nil + else + cdb_maps[hash_key] = cdb_handle + end + end + + -- Look up the key in CDB + local result = cdb_maps[hash_key]:find(key) + + if callback then + if result then + callback(true, result, 200, task) + else + callback(false, 'not found', 404, task) + end + return nil + end + + return result +end + local function query_external_map(map_config, upstreams, key, callback, task) + -- Check if this is a CDB map + if map_config.cdb then + return handle_cdb_map(map_config, key, callback, task) + end + -- Fallback to HTTP local http_method = (map_config.method == 'body' or map_config.method == 'form') and 'POST' or 'GET' local upstream = upstreams:get_upstream_round_robin() local http_headers = { @@ -138,7 +186,8 @@ local function query_external_map(map_config, upstreams, key, callback, task) local params_table = {} for k, v in pairs(key) do if type(v) == 'string' then - table.insert(params_table, string.format('%s=%s', lua_util.url_encode_string(k), lua_util.url_encode_string(v))) + table.insert(params_table, + string.format('%s=%s', lua_util.url_encode_string(k), lua_util.url_encode_string(v))) end end url = string.format('%s?%s', url, table.concat(params_table, '&')) @@ -305,7 +354,7 @@ local function rspamd_map_add_from_ucl(opt, mtype, description, callback) if string.find(opt[1], '^%d') then -- List of numeric stuff (hope it's ipnets definitions) - local map = rspamd_config:radix_from_ucl(opt) + local map = rspamd_config:radix_from_ucl(opt, description) if map then ret.__data = map @@ -448,17 +497,39 @@ local function rspamd_map_add_from_ucl(opt, mtype, description, callback) local parse_res, parse_err = external_map_schema(opt) if parse_res then - ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend) - if ret.__upstreams then + if opt.cdb then ret.__data = opt ret.__external = true setmetatable(ret, ret_mt) maybe_register_selector() + if not cdb_finisher_set then + -- Register a finalize script to close all CDB handles when Rspamd stops + rspamd_config:register_finish_script(function() + for path, _ in pairs(cdb_maps) do + rspamd_logger.infox(rspamd_config, 'closing CDB map: %s', path) + cdb_maps[path] = nil + end + end) + cdb_finisher_set = true + end + return ret + elseif opt.backend then + ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend) + if ret.__upstreams then + ret.__data = opt + ret.__external = true + setmetatable(ret, ret_mt) + maybe_register_selector() + + return ret + else + rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s', + opt.backend) + end else - rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s', - opt.backend) + rspamd_logger.errx(rspamd_config, 'external map requires either "cdb" or "backend" parameter') end else rspamd_logger.errx(rspamd_config, 'cannot parse external map: %s', @@ -526,15 +597,12 @@ local function rspamd_maybe_check_map(key, what) return rspamd_maybe_check_map(key, elt) end, what) end - if type(rspamd_maps) == "table" then - local mn - if starts(key, "map:") then - mn = string.sub(key, 5) - elseif starts(key, "map://") then - mn = string.sub(key, 7) + if type(rspamd_maps) == "table" and starts(key, "map:") then + local mn = string.sub(key, 5) + if starts(mn, "//") then + mn = string.sub(mn, 3) end - - if mn and rspamd_maps[mn] then + if rspamd_maps[mn] then return rspamd_maps[mn]:get_key(what) end end diff --git a/lualib/lua_maps_expressions.lua b/lualib/lua_maps_expressions.lua index 996de99c0..2ad9ad1d8 100644 --- a/lualib/lua_maps_expressions.lua +++ b/lualib/lua_maps_expressions.lua @@ -155,7 +155,7 @@ local function create(cfg, obj, module_name) end end local map = lua_maps.map_add_from_ucl(rule.map, rule.type, - obj.description or module_name) + rule.description or obj.description or module_name) if not map then rspamd_logger.errx(cfg, 'cannot add map for element %s in module %s', name, module_name) diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua index fe221f599..c85f35066 100644 --- a/lualib/lua_mime.lua +++ b/lualib/lua_mime.lua @@ -158,13 +158,21 @@ exports.add_text_footer = function(task, html_footer, text_footer) local cur_boundary for _, part in ipairs(task:get_parts()) do local boundary = part:get_boundary() + local part_ct = part:get_header('Content-Type') + if part_ct then + part_ct = rspamd_util.parse_content_type(part_ct, task:get_mempool()) + end if part:is_multipart() then if cur_boundary then out[#out + 1] = string.format('--%s', - boundaries[#boundaries]) + boundaries[#boundaries].boundary) end - boundaries[#boundaries + 1] = boundary or '--XXX' + boundaries[#boundaries + 1] = { + boundary = boundary or '--XXX', + ct_type = part_ct.type or '', + ct_subtype = part_ct.subtype or '', + } cur_boundary = boundary local rh = part:get_raw_headers() @@ -176,7 +184,7 @@ exports.add_text_footer = function(task, html_footer, text_footer) if cur_boundary and boundary ~= cur_boundary then -- Need to close boundary out[#out + 1] = string.format('--%s--%s', - boundaries[#boundaries], newline_s) + boundaries[#boundaries].boundary, newline_s) table.remove(boundaries) cur_boundary = nil end @@ -218,7 +226,13 @@ exports.add_text_footer = function(task, html_footer, text_footer) if cur_boundary and boundary ~= cur_boundary then -- Need to close boundary out[#out + 1] = string.format('--%s--%s', - boundaries[#boundaries], newline_s) + boundaries[#boundaries].boundary, newline_s) + -- Need to close previous boundary, if ct_subtype is related + if #boundaries > 1 and boundaries[#boundaries].ct_type == "multipart" and boundaries[#boundaries].ct_subtype == "related" then + out[#out + 1] = string.format('--%s--%s', + boundaries[#boundaries -1].boundary, newline_s) + table.remove(boundaries) + end table.remove(boundaries) cur_boundary = boundary end @@ -239,7 +253,7 @@ exports.add_text_footer = function(task, html_footer, text_footer) -- Close remaining local b = table.remove(boundaries) while b do - out[#out + 1] = string.format('--%s--', b) + out[#out + 1] = string.format('--%s--', b.boundary) if #boundaries > 0 then out[#out + 1] = '' end diff --git a/lualib/lua_mime_types.lua b/lualib/lua_mime_types.lua index ba55f9740..7b6688b3c 100644 --- a/lualib/lua_mime_types.lua +++ b/lualib/lua_mime_types.lua @@ -214,7 +214,7 @@ exports.full_extensions_map = { { "hxw", "application/octet-stream" }, { "hxx", "text/plain" }, { "i", "text/plain" }, - { "ico", "image/x-icon" }, + { "ico", {"image/x-icon", "image/vnd.microsoft.icon"} }, { "ics", { "text/calendar", "application/ics", "application/octet-stream" } }, { "idl", "text/plain" }, { "ief", "image/ief" }, diff --git a/lualib/lua_redis.lua b/lualib/lua_redis.lua index 48ea1b6ed..195b7759f 100644 --- a/lualib/lua_redis.lua +++ b/lualib/lua_redis.lua @@ -26,7 +26,7 @@ local N = "lua_redis" local db_schema = (ts.number / tostring + ts.string):is_optional():describe("Database number") local common_schema = { - timeout = (ts.number + ts.string / lutil.parse_time_interval):is_optional():describe("Connection timeout"), + timeout = (ts.number + ts.string / lutil.parse_time_interval):is_optional():describe("Connection timeout (seconds)"), db = db_schema, database = db_schema, dbname = db_schema, @@ -40,6 +40,7 @@ local common_schema = { sentinel_master_maxerrors = (ts.number + ts.string / tonumber):is_optional():describe("Sentinel master max errors"), sentinel_username = ts.string:is_optional():describe("Sentinel username"), sentinel_password = ts.string:is_optional():describe("Sentinel password"), + redis_version = (ts.number + ts.string / tonumber):is_optional():describe("Redis server version (6 or 7)"), } local read_schema = lutil.table_merge({ @@ -357,6 +358,10 @@ local function process_redis_opts(options, redis_params) redis_params['prefix'] = options['prefix'] end + if options['redis_version'] and not redis_params['redis_version'] then + redis_params['redis_version'] = tonumber(options['redis_version']) + end + if type(options['expand_keys']) == 'boolean' then redis_params['expand_keys'] = options['expand_keys'] else @@ -1124,9 +1129,9 @@ local function redis_make_request_taskless(ev_base, cfg, redis_params, key, end --[[[ --- @function lua_redis.redis_make_request_taskless(ev_base, redis_params, key, is_write, callback, command, args) +-- @function lua_redis.redis_make_request_taskless(ev_base, cfg, redis_params, key, is_write, callback, command, args) -- Sends a request to Redis in context where `task` is not available for some specific use-cases --- Identical to redis_make_request() except in that first parameter is an `event base` object +-- Identical to redis_make_request() except in that first parameter is an `event base` object and the second one is the 'config' object --]] exports.rspamd_redis_make_request_taskless = redis_make_request_taskless @@ -1202,15 +1207,13 @@ local function prepare_redis_call(script) return options end -local function is_all_servers_ready(script) +local function is_any_server_ready(script) for _, s in ipairs(script.servers_ready) do - if s == "unsent" or s == "tempfail" then - return false + if s == "done" then + return true end end - - -- We assume that permanent errors are not recoverable, so we will just skip those servers - return true + return false end local function is_all_servers_failed(script) @@ -1264,7 +1267,7 @@ local function load_script_task(script, task, is_write) script.sha = data -- We assume that sha is the same on all servers script.servers_ready[idx] = "done" end - if is_all_servers_ready(script) then + if is_any_server_ready(script) then script_set_loaded(script) elseif is_all_servers_failed(script) then script.pending_upload = false @@ -1282,7 +1285,7 @@ local function load_script_task(script, task, is_write) end end - if is_all_servers_ready(script) then + if is_any_server_ready(script) then script_set_loaded(script) elseif is_all_servers_failed(script) then script.pending_upload = false @@ -1309,7 +1312,6 @@ local function load_script_taskless(script, cfg, ev_base, is_write) err, script.caller.short_src, script.caller.currentline) opt.upstream:fail() script.servers_ready[idx] = "failed" - return else -- Assume temporary error logger.infox(cfg, 'temporary error uploading script %s to %s: %s; registered from: %s:%s', @@ -1317,7 +1319,6 @@ local function load_script_taskless(script, cfg, ev_base, is_write) opt.upstream:get_addr():to_string(true), err, script.caller.short_src, script.caller.currentline) script.servers_ready[idx] = "tempfail" - return end else opt.upstream:ok() @@ -1330,7 +1331,7 @@ local function load_script_taskless(script, cfg, ev_base, is_write) script.servers_ready[idx] = "done" end - if is_all_servers_ready(script) then + if is_any_server_ready(script) then script_set_loaded(script) elseif is_all_servers_failed(script) then script.pending_upload = false @@ -1348,7 +1349,7 @@ local function load_script_taskless(script, cfg, ev_base, is_write) end end - if is_all_servers_ready(script) then + if is_any_server_ready(script) then script_set_loaded(script) elseif is_all_servers_failed(script) then script.pending_upload = false @@ -1477,6 +1478,10 @@ local function exec_redis_script(id, params, callback, keys, args) script.sha = nil script.loaded = nil script.pending_upload = true + -- We must initialize all servers as we don't know here which one failed + for i, _ in ipairs(script.servers_ready) do + script.servers_ready[i] = "unsent" + end -- Reload scripts if this has not been initiated yet if params.task then load_script_task(script, params.task) @@ -1510,15 +1515,20 @@ local function exec_redis_script(id, params, callback, keys, args) end end + local redis_command = 'EVALSHA' + if not params.is_write and script.redis_params.redis_version and + script.redis_params.redis_version >= 7 then + redis_command = 'EVALSHA_RO' + end if params.task then if not rspamd_redis_make_request(params.task, script.redis_params, - params.key, params.is_write, redis_cb, 'EVALSHA', redis_args) then + params.key, params.is_write, redis_cb, redis_command, redis_args) then callback('Cannot make redis request', nil) end else if not redis_make_request_taskless(params.ev_base, rspamd_config, script.redis_params, - params.key, params.is_write, redis_cb, 'EVALSHA', redis_args) then + params.key, params.is_write, redis_cb, redis_command, redis_args) then callback('Cannot make redis request', nil) end end @@ -1738,11 +1748,10 @@ exports.request = function(redis_params, attrs, req) opts.dbname = redis_params.db end - lutil.debugm(N, 'perform generic request to redis server' .. - ' (host=%s, timeout=%s): cmd: %s, arguments: %s', addr, - opts.timeout, opts.cmd, opts.args) - if opts.callback then + lutil.debugm(N, 'perform generic async request to redis server' .. + ' (host=%s, timeout=%s): cmd: %s, arguments: %s', addr, + opts.timeout, opts.cmd, opts.args) local ret, conn = rspamd_redis.make_request(opts) if not ret then logger.errx(log_obj, 'cannot execute redis request') @@ -1752,6 +1761,9 @@ exports.request = function(redis_params, attrs, req) return ret, conn, addr else -- Coroutines version + lutil.debugm(N, 'perform generic coroutine request to redis server' .. + ' (host=%s, timeout=%s): cmd: %s, arguments: %s', addr, + opts.timeout, opts.cmd, opts.args) local ret, conn = rspamd_redis.connect_sync(opts) if not ret then logger.errx(log_obj, 'cannot execute redis request') diff --git a/lualib/lua_scanners/cloudmark.lua b/lualib/lua_scanners/cloudmark.lua index ccb45b047..12a60abf1 100644 --- a/lualib/lua_scanners/cloudmark.lua +++ b/lualib/lua_scanners/cloudmark.lua @@ -173,53 +173,6 @@ local function cloudmark_config(opts) return nil end --- Converts a key-value map to the table representing multipart body, with the following values: --- `data`: data of the part --- `filename`: optional filename --- `content-type`: content type of the element (optional) --- `content-transfer-encoding`: optional CTE header -local function table_to_multipart_body(tbl, boundary) - local seen_data = false - local out = {} - - for k, v in pairs(tbl) do - if v.data then - seen_data = true - table.insert(out, string.format('--%s\r\n', boundary)) - if v.filename then - table.insert(out, - string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n', - k, v.filename)) - else - table.insert(out, - string.format('Content-Disposition: form-data; name="%s"\r\n', k)) - end - if v['content-type'] then - table.insert(out, - string.format('Content-Type: %s\r\n', v['content-type'])) - else - table.insert(out, 'Content-Type: text/plain\r\n') - end - if v['content-transfer-encoding'] then - table.insert(out, - string.format('Content-Transfer-Encoding: %s\r\n', - v['content-transfer-encoding'])) - else - table.insert(out, 'Content-Transfer-Encoding: binary\r\n') - end - table.insert(out, '\r\n') - table.insert(out, v.data) - table.insert(out, '\r\n') - end - end - - if seen_data then - table.insert(out, string.format('--%s--\r\n', boundary)) - end - - return out -end - local function get_specific_symbol(scores_symbols, score) local selected local sel_thr = -1 @@ -263,7 +216,8 @@ local function parse_cloudmark_reply(task, rule, body) if obj.analysis then -- Report analysis string - rspamd_logger.infox(task, 'cloudmark report string: %s', obj.analysis) + local qid = task:get_queue_id() or 'unknown' + rspamd_logger.infox(task, 'qid: <%s>, cloudmark report string: %s', qid, obj.analysis) end local score = tonumber(obj.score) or 0 @@ -358,7 +312,7 @@ local function cloudmark_check(task, content, digest, rule, maybe_part) local request_data = { task = task, url = url, - body = table_to_multipart_body(request, static_boundary), + body = lua_util.table_to_multipart_body(request, static_boundary), headers = { ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary) }, diff --git a/lualib/lua_scanners/icap.lua b/lualib/lua_scanners/icap.lua index 2e3ced034..532858793 100644 --- a/lualib/lua_scanners/icap.lua +++ b/lualib/lua_scanners/icap.lua @@ -239,13 +239,16 @@ local function icap_check(task, content, digest, rule, maybe_part) end end - local function get_req_headers() - + local function get_req_headers() local in_client_ip = task:get_from_ip() + local in_client_ip_str = in_client_ip:to_string() local req_hlen = 2 + if in_client_ip:get_version() == 6 then + in_client_ip_str = "ip6-" .. string.gsub(in_client_ip_str, ":", "-") + end if maybe_part then table.insert(req_headers, - string.format('GET http://%s/%s HTTP/1.0\r\n', in_client_ip, lua_util.url_encode_string(maybe_part:get_filename()))) + string.format('GET http://%s/%s HTTP/1.0\r\n', in_client_ip_str, lua_util.url_encode_string(maybe_part:get_filename()))) if rule.use_specific_content_type then table.insert(http_headers, string.format('Content-Type: %s/%s\r\n', maybe_part:get_detected_type())) --else diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua index 62b38c87e..636212b1f 100644 --- a/lualib/lua_util.lua +++ b/lualib/lua_util.lua @@ -1805,4 +1805,55 @@ exports.symbols_priorities = { low = 0, } +---[[[ +-- @function lua_util.table_to_multipart_body(tbl, boundary) +-- Converts a key-value map to the table representing multipart body, with the following values: +-- `data`: data of the part +-- `filename`: optional filename +-- `content-type`: content type of the element (optional) +-- `content-transfer-encoding`: optional CTE header +local function table_to_multipart_body(tbl, boundary) + local seen_data = false + local out = {} + + for k, v in pairs(tbl) do + if v.data then + seen_data = true + table.insert(out, string.format('--%s\r\n', boundary)) + if v.filename then + table.insert(out, + string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n', + k, v.filename)) + else + table.insert(out, + string.format('Content-Disposition: form-data; name="%s"\r\n', k)) + end + if v['content-type'] then + table.insert(out, + string.format('Content-Type: %s\r\n', v['content-type'])) + else + table.insert(out, 'Content-Type: text/plain\r\n') + end + if v['content-transfer-encoding'] then + table.insert(out, + string.format('Content-Transfer-Encoding: %s\r\n', + v['content-transfer-encoding'])) + else + table.insert(out, 'Content-Transfer-Encoding: binary\r\n') + end + table.insert(out, '\r\n') + table.insert(out, v.data) + table.insert(out, '\r\n') + end + end + + if seen_data then + table.insert(out, string.format('--%s--\r\n', boundary)) + end + + return out +end + +exports.table_to_multipart_body = table_to_multipart_body + return exports diff --git a/lualib/plugins/neural.lua b/lualib/plugins/neural.lua index 6e88ef21c..545214669 100644 --- a/lualib/plugins/neural.lua +++ b/lualib/plugins/neural.lua @@ -757,7 +757,7 @@ local function process_rules_settings() type = 'set', }) lua_redis.register_prefix(selt.prefix .. '_\\d+_ham_set', N, - string.format('NN learning set (spam) for rule "%s"; settings id "%s"', + string.format('NN learning set (ham) for rule "%s"; settings id "%s"', rule.prefix, selt.name), { persistent = true, type = 'set', diff --git a/lualib/plugins/rbl.lua b/lualib/plugins/rbl.lua index af5d6bd91..074fc7f0c 100644 --- a/lualib/plugins/rbl.lua +++ b/lualib/plugins/rbl.lua @@ -32,6 +32,7 @@ local check_types = { content_urls = {}, numeric_urls = {}, emails = {}, + images = {}, replyto = {}, dkim = {}, rdns = { @@ -165,8 +166,6 @@ local function convert_checks(rule, name) end end - rule[check] = check_type - if not check_type.connfilter then all_connfilter = false end @@ -176,6 +175,8 @@ local function convert_checks(rule, name) name, check) return nil end + + rule[check] = true else rspamd_logger.infox(rspamd_config, 'disable check %s in %s: excluded explicitly', check, name) diff --git a/lualib/redis_scripts/bayes_cache_learn.lua b/lualib/redis_scripts/bayes_cache_learn.lua index d8a2d878e..a7c9ac443 100644 --- a/lualib/redis_scripts/bayes_cache_learn.lua +++ b/lualib/redis_scripts/bayes_cache_learn.lua @@ -1,12 +1,15 @@ --- Lua script to perform cache checking for bayes classification +-- Lua script to perform cache checking for bayes classification (multi-class) -- This script accepts the following parameters: -- key1 - cache id --- key3 - is spam (1 or 0) +-- key2 - class_id (numeric hash of class name, computed by C side) -- key3 - configuration table in message pack local cache_id = KEYS[1] -local is_spam = KEYS[2] +local class_id = KEYS[2] local conf = cmsgpack.unpack(KEYS[3]) + +-- Use class_id directly as cache value +local cache_value = tostring(class_id) cache_id = string.sub(cache_id, 1, conf.cache_elt_len) -- Try each prefix that is in Redis (as some other instance might have set it) @@ -15,8 +18,8 @@ for i = 0, conf.cache_max_keys do local have = redis.call('HGET', prefix, cache_id) if have then - -- Already in cache, but is_spam changes when relearning - redis.call('HSET', prefix, cache_id, is_spam) + -- Already in cache, but cache_value changes when relearning + redis.call('HSET', prefix, cache_id, cache_value) return false end end @@ -30,7 +33,7 @@ for i = 0, conf.cache_max_keys do if count < lim then -- We can add it to this prefix - redis.call('HSET', prefix, cache_id, is_spam) + redis.call('HSET', prefix, cache_id, cache_value) added = true end end @@ -46,7 +49,7 @@ if not added then if exists then if not expired then redis.call('DEL', prefix) - redis.call('HSET', prefix, cache_id, is_spam) + redis.call('HSET', prefix, cache_id, cache_value) -- Do not expire anything else expired = true diff --git a/lualib/redis_scripts/bayes_classify.lua b/lualib/redis_scripts/bayes_classify.lua index e94f645fd..d6132e631 100644 --- a/lualib/redis_scripts/bayes_classify.lua +++ b/lualib/redis_scripts/bayes_classify.lua @@ -1,37 +1,68 @@ --- Lua script to perform bayes classification +-- Lua script to perform bayes classification (multi-class) -- This script accepts the following parameters: -- key1 - prefix for bayes tokens (e.g. for per-user classification) --- key2 - set of tokens encoded in messagepack array of strings +-- key2 - class labels: table of all class labels as "TABLE:label1,label2,..." +-- key3 - set of tokens encoded in messagepack array of strings local prefix = KEYS[1] -local output_spam = {} -local output_ham = {} +local class_labels_arg = KEYS[2] +local input_tokens = cmsgpack.unpack(KEYS[3]) -local learned_ham = tonumber(redis.call('HGET', prefix, 'learns_ham')) or 0 -local learned_spam = tonumber(redis.call('HGET', prefix, 'learns_spam')) or 0 +-- Parse class labels (always expect TABLE: format) +local class_labels = {} +if string.match(class_labels_arg, "^TABLE:") then + local labels_str = string.sub(class_labels_arg, 7) -- Remove "TABLE:" prefix + for label in string.gmatch(labels_str, "([^,]+)") do + table.insert(class_labels, label) + end +else + -- Legacy single class - convert to array + class_labels = { class_labels_arg } +end --- Output is a set of pairs (token_index, token_count), tokens that are not --- found are not filled. --- This optimisation will save a lot of space for sparse tokens, and in Bayes that assumption is normally held +-- Get learned counts for all classes (ordered) +local learned_counts = {} +for _, label in ipairs(class_labels) do + local key = 'learns_' .. string.lower(label) + -- Handle legacy keys for backward compatibility + if label == 'H' then + key = 'learns_ham' + elseif label == 'S' then + key = 'learns_spam' + end + table.insert(learned_counts, tonumber(redis.call('HGET', prefix, key)) or 0) +end -if learned_ham > 0 and learned_spam > 0 then - local input_tokens = cmsgpack.unpack(KEYS[2]) - for i, token in ipairs(input_tokens) do - local token_data = redis.call('HMGET', token, 'H', 'S') +-- Get token data for all classes (ordered) +local token_results = {} +for i, _ in ipairs(class_labels) do + token_results[i] = {} +end - if token_data then - local ham_count = token_data[1] - local spam_count = token_data[2] +-- Check if we have any learning data +local has_learns = false +for _, count in ipairs(learned_counts) do + if count > 0 then + has_learns = true + break + end +end - if ham_count then - table.insert(output_ham, { i, tonumber(ham_count) }) - end +if has_learns then + -- Process each token + for i, token in ipairs(input_tokens) do + local token_data = redis.call('HMGET', token, unpack(class_labels)) - if spam_count then - table.insert(output_spam, { i, tonumber(spam_count) }) + if token_data then + for j, _ in ipairs(class_labels) do + local count = token_data[j] + if count and tonumber(count) > 0 then + table.insert(token_results[j], { i, tonumber(count) }) + end end end end end -return { learned_ham, learned_spam, output_ham, output_spam }
\ No newline at end of file +-- Always return ordered arrays: [learned_counts_array, token_results_array] +return { learned_counts, token_results } diff --git a/lualib/redis_scripts/bayes_learn.lua b/lualib/redis_scripts/bayes_learn.lua index 5456165b6..ebc798fe0 100644 --- a/lualib/redis_scripts/bayes_learn.lua +++ b/lualib/redis_scripts/bayes_learn.lua @@ -1,14 +1,14 @@ --- Lua script to perform bayes learning +-- Lua script to perform bayes learning (multi-class) -- This script accepts the following parameters: -- key1 - prefix for bayes tokens (e.g. for per-user classification) --- key2 - boolean is_spam +-- key2 - class label string (e.g. "S", "H", "T") -- key3 - string symbol -- key4 - boolean is_unlearn -- key5 - set of tokens encoded in messagepack array of strings -- key6 - set of text tokens (if any) encoded in messagepack array of strings (size must be twice of `KEYS[5]`) local prefix = KEYS[1] -local is_spam = KEYS[2] == 'true' and true or false +local class_label = KEYS[2] local symbol = KEYS[3] local is_unlearn = KEYS[4] == 'true' and true or false local input_tokens = cmsgpack.unpack(KEYS[5]) @@ -18,15 +18,47 @@ if KEYS[6] then text_tokens = cmsgpack.unpack(KEYS[6]) end -local hash_key = is_spam and 'S' or 'H' -local learned_key = is_spam and 'learns_spam' or 'learns_ham' +-- Handle backward compatibility for boolean values +if class_label == 'true' then + class_label = 'S' -- spam +elseif class_label == 'false' then + class_label = 'H' -- ham +end + +local hash_key = class_label +local learned_key = 'learns_' .. string.lower(class_label) + +-- Handle legacy keys for backward compatibility +if class_label == 'S' then + learned_key = 'learns_spam' +elseif class_label == 'H' then + learned_key = 'learns_ham' +end redis.call('SADD', symbol .. '_keys', prefix) redis.call('HSET', prefix, 'version', '2') -- new schema -redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count + +-- Update learned count, but prevent it from going negative +if is_unlearn then + local current_count = tonumber(redis.call('HGET', prefix, learned_key)) or 0 + if current_count > 0 then + redis.call('HINCRBY', prefix, learned_key, -1) + end +else + redis.call('HINCRBY', prefix, learned_key, 1) +end for i, token in ipairs(input_tokens) do - redis.call('HINCRBY', token, hash_key, is_unlearn and -1 or 1) + -- Update token count, but prevent it from going negative + if is_unlearn then + local current_token_count = tonumber(redis.call('HGET', token, hash_key)) or 0 + if current_token_count > 0 then + redis.call('HINCRBY', token, hash_key, -1) + end + else + redis.call('HINCRBY', token, hash_key, 1) + end + if text_tokens then local tok1 = text_tokens[i * 2 - 1] local tok2 = text_tokens[i * 2] @@ -38,7 +70,14 @@ for i, token in ipairs(input_tokens) do redis.call('HSET', token, 'tokens', tok1) end - redis.call('ZINCRBY', prefix .. '_z', is_unlearn and -1 or 1, token) + if is_unlearn then + local current_z_score = tonumber(redis.call('ZSCORE', prefix .. '_z', token)) or 0 + if current_z_score > 0 then + redis.call('ZINCRBY', prefix .. '_z', -1, token) + end + else + redis.call('ZINCRBY', prefix .. '_z', 1, token) + end end end end diff --git a/lualib/redis_scripts/neural_save_unlock.lua b/lualib/redis_scripts/neural_save_unlock.lua index 5af1ddcde..7ea7dc2e5 100644 --- a/lualib/redis_scripts/neural_save_unlock.lua +++ b/lualib/redis_scripts/neural_save_unlock.lua @@ -12,13 +12,14 @@ local now = tonumber(KEYS[6]) redis.call('ZADD', KEYS[2], now, KEYS[4]) redis.call('HSET', KEYS[1], 'ann', KEYS[3]) -redis.call('DEL', KEYS[1] .. '_spam_set') -redis.call('DEL', KEYS[1] .. '_ham_set') -redis.call('HDEL', KEYS[1], 'lock') -redis.call('HDEL', KEYS[7], 'lock') -redis.call('EXPIRE', KEYS[1], tonumber(KEYS[5])) redis.call('HSET', KEYS[1], 'roc_thresholds', KEYS[8]) if KEYS[9] then redis.call('HSET', KEYS[1], 'pca', KEYS[9]) end -return 1
\ No newline at end of file +redis.call('HDEL', KEYS[1], 'lock') +redis.call('HDEL', KEYS[7], 'lock') +redis.call('EXPIRE', KEYS[1], tonumber(KEYS[5])) + -- expire in 10m, to not face race condition with other rspamd replicas refill deleted keys +redis.call('EXPIRE', KEYS[7] .. '_spam_set', 600) +redis.call('EXPIRE', KEYS[7] .. '_ham_set', 600) +return 1 diff --git a/lualib/rspamadm/dmarc_report.lua b/lualib/rspamadm/dmarc_report.lua index 71ff5d163..fb28a9264 100644 --- a/lualib/rspamadm/dmarc_report.lua +++ b/lualib/rspamadm/dmarc_report.lua @@ -99,6 +99,8 @@ local redis_attrs = { log_obj = rspamd_config, resolver = rspamadm_dns_resolver, } +local redis_attrs_write = lua_util.shallowcopy(redis_attrs) +redis_attrs_write['is_write'] = true local pool local function load_config(opts) @@ -481,7 +483,7 @@ local function prepare_report(opts, start_time, end_time, rep_key) -- Rename report key to avoid races if not opts.no_opt then - lua_redis.request(redis_params, redis_attrs, + lua_redis.request(redis_params, redis_attrs_write, { 'RENAME', rep_key, rep_key .. '_processing' }) rep_key = rep_key .. '_processing' end @@ -491,7 +493,7 @@ local function prepare_report(opts, start_time, end_time, rep_key) if not dmarc_record then if not opts.no_opt then - lua_redis.request(redis_params, redis_attrs, + lua_redis.request(redis_params, redis_attrs_write, { 'DEL', rep_key }) end logger.messagex('Cannot process reports for domain %s; invalid dmarc record', reporting_domain) @@ -554,7 +556,7 @@ local function prepare_report(opts, start_time, end_time, rep_key) lua_util.debugm(N, 'got final message: %s', message) if not opts.no_opt then - lua_redis.request(redis_params, redis_attrs, + lua_redis.request(redis_params, redis_attrs_write, { 'DEL', rep_key }) end @@ -585,7 +587,7 @@ local function process_report_date(opts, start_time, end_time, date) -- Rename index key to avoid races if not opts.no_opt then - lua_redis.request(redis_params, redis_attrs, + lua_redis.request(redis_params, redis_attrs_write, { 'RENAME', idx_key, idx_key .. '_processing' }) idx_key = idx_key .. '_processing' end @@ -595,7 +597,7 @@ local function process_report_date(opts, start_time, end_time, date) if not ret or not results then -- Remove bad key if not opts.no_opt then - lua_redis.request(redis_params, redis_attrs, + lua_redis.request(redis_params, redis_attrs_write, { 'DEL', idx_key }) end logger.messagex('Cannot get reports for %s', date) @@ -615,7 +617,7 @@ local function process_report_date(opts, start_time, end_time, date) lua_util.shuffle(reports) -- Remove processed key if not opts.no_opt then - lua_redis.request(redis_params, redis_attrs, + lua_redis.request(redis_params, redis_attrs_write, { 'DEL', idx_key }) end @@ -715,11 +717,11 @@ local function handler(args) if not opts.no_opt then lua_util.debugm(N, 'set last report date to %s', start_collection) -- Hack to avoid coroutines + async functions mess: we use async redis call here - redis_attrs.callback = function() + redis_attrs_write.callback = function() logger.messagex('Reporting collection has finished %s dates processed, %s reports: %s completed, %s failed', ndates, nreports, nsuccess, nfail) end - lua_redis.request(redis_params, redis_attrs, + lua_redis.request(redis_params, redis_attrs_write, { 'SETEX', 'rspamd_dmarc_last_collection', dmarc_settings.reporting.keys_expire * 2, tostring(start_collection) }) else diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua index e0b23e16c..a20e47e23 100644 --- a/lualib/rspamadm/mime.lua +++ b/lualib/rspamadm/mime.lua @@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -]]-- +]] -- local argparse = require "argparse" local ansicolors = require "ansicolors" @@ -35,94 +35,94 @@ local parser = argparse() :require_command(true) parser:option "-c --config" - :description "Path to config file" - :argname("<cfg>") - :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf") + :description "Path to config file" + :argname("<cfg>") + :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf") parser:mutex( - parser:flag "-j --json" - :description "JSON output", - parser:flag "-U --ucl" - :description "UCL output", - parser:flag "-M --messagepack" - :description "MessagePack output" + parser:flag "-j --json" + :description "JSON output", + parser:flag "-U --ucl" + :description "UCL output", + parser:flag "-M --messagepack" + :description "MessagePack output" ) parser:flag "-C --compact" - :description "Use compact format" + :description "Use compact format" parser:flag "--no-file" - :description "Do not print filename" + :description "Do not print filename" -- Extract subcommand local extract = parser:command "extract ex e" - :description "Extracts data from MIME messages" + :description "Extracts data from MIME messages" extract:argument "file" - :description "File to process" - :argname "<file>" - :args "+" + :description "File to process" + :argname "<file>" + :args "+" extract:flag "-t --text" - :description "Extracts plain text data from a message" + :description "Extracts plain text data from a message" extract:flag "-H --html" - :description "Extracts htm data from a message" + :description "Extracts htm data from a message" extract:option "-o --output" - :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')" - :argname("<type>") - :convert { - raw = "raw", - content = "content", - oneline = "content_oneline", - decoded = "raw_parsed", - decoded_utf = "raw_utf" -} - :default "content" + :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')" + :argname("<type>") + :convert { + raw = "raw", + content = "content", + oneline = "content_oneline", + decoded = "raw_parsed", + decoded_utf = "raw_utf" + } + :default "content" extract:flag "-w --words" - :description "Extracts words" + :description "Extracts words" extract:flag "-p --part" - :description "Show part info" + :description "Show part info" extract:flag "-s --structure" - :description "Show structure info (e.g. HTML tags)" + :description "Show structure info (e.g. HTML tags)" extract:flag "-i --invisible" - :description "Show invisible content for HTML parts" + :description "Show invisible content for HTML parts" extract:option "-F --words-format" - :description "Words format ('stem', 'norm', 'raw', 'full')" - :argname("<type>") - :convert { - stem = "stem", - norm = "norm", - raw = "raw", - full = "full", -} - :default "stem" + :description "Words format ('stem', 'norm', 'raw', 'full')" + :argname("<type>") + :convert { + stem = "stem", + norm = "norm", + raw = "raw", + full = "full", + } + :default "stem" local stat = parser:command "stat st s" - :description "Extracts statistical data from MIME messages" + :description "Extracts statistical data from MIME messages" stat:argument "file" :description "File to process" :argname "<file>" :args "+" stat:mutex( - stat:flag "-m --meta" - :description "Lua metatokens", - stat:flag "-b --bayes" - :description "Bayes tokens", - stat:flag "-F --fuzzy" - :description "Fuzzy hashes" + stat:flag "-m --meta" + :description "Lua metatokens", + stat:flag "-b --bayes" + :description "Bayes tokens", + stat:flag "-F --fuzzy" + :description "Fuzzy hashes" ) stat:flag "-s --shingles" :description "Show shingles for fuzzy hashes" local urls = parser:command "urls url u" - :description "Extracts URLs from MIME messages" + :description "Extracts URLs from MIME messages" urls:argument "file" :description "File to process" :argname "<file>" :args "+" urls:mutex( - urls:flag "-t --tld" - :description "Get TLDs only", - urls:flag "-H --host" - :description "Get hosts only", - urls:flag "-f --full" - :description "Show piecewise urls as processed by Rspamd" + urls:flag "-t --tld" + :description "Get TLDs only", + urls:flag "-H --host" + :description "Get hosts only", + urls:flag "-f --full" + :description "Show piecewise urls as processed by Rspamd" ) urls:flag "-u --unique" @@ -135,75 +135,75 @@ urls:flag "-r --reverse" :description "Reverse sort order" local modify = parser:command "modify mod m" - :description "Modifies MIME message" + :description "Modifies MIME message" modify:argument "file" - :description "File to process" - :argname "<file>" - :args "+" + :description "File to process" + :argname "<file>" + :args "+" modify:option "-a --add-header" - :description "Adds specific header" - :argname "<header=value>" - :count "*" + :description "Adds specific header" + :argname "<header=value>" + :count "*" modify:option "-r --remove-header" - :description "Removes specific header (all occurrences)" - :argname "<header>" - :count "*" + :description "Removes specific header (all occurrences)" + :argname "<header>" + :count "*" modify:option "-R --rewrite-header" - :description "Rewrites specific header, uses Lua string.format pattern" - :argname "<header=pattern>" - :count "*" + :description "Rewrites specific header, uses Lua string.format pattern" + :argname "<header=pattern>" + :count "*" modify:option "-t --text-footer" - :description "Adds footer to text/plain parts from a specific file" - :argname "<file>" + :description "Adds footer to text/plain parts from a specific file" + :argname "<file>" modify:option "-H --html-footer" - :description "Adds footer to text/html parts from a specific file" - :argname "<file>" + :description "Adds footer to text/html parts from a specific file" + :argname "<file>" local strip = parser:command "strip" - :description "Strip attachments from a message" + :description "Strip attachments from a message" strip:argument "file" - :description "File to process" - :argname "<file>" - :args "+" + :description "File to process" + :argname "<file>" + :args "+" strip:flag "-i --keep-images" - :description "Keep images" + :description "Keep images" strip:option "--min-text-size" - :description "Minimal text size to keep" - :argname "<size>" - :convert(tonumber) - :default(0) + :description "Minimal text size to keep" + :argname "<size>" + :convert(tonumber) + :default(0) strip:option "--max-text-size" - :description "Max text size to keep" - :argname "<size>" - :convert(tonumber) - :default(math.huge) + :description "Max text size to keep" + :argname "<size>" + :convert(tonumber) + :default(math.huge) local anonymize = parser:command "anonymize" - :description "Try to remove sensitive information from a message" + :description "Try to remove sensitive information from a message" anonymize:argument "file" - :description "File to process" - :argname "<file>" - :args "+" + :description "File to process" + :argname "<file>" + :args "+" anonymize:option "--exclude-header -X" - :description "Exclude specific headers from anonymization" - :argname "<header>" - :count "*" + :description "Exclude specific headers from anonymization" + :argname "<header>" + :count "*" anonymize:option "--include-header -I" - :description "Include specific headers from anonymization" - :argname "<header>" - :count "*" + :description "Include specific headers from anonymization" + :argname "<header>" + :count "*" anonymize:flag "--gpt" - :description "Use LLM model for anonymization (requires GPT plugin to be configured)" + :description "Use LLM model for anonymization (requires GPT plugin to be configured)" anonymize:option "--model" - :description "Model to use for anonymization" - :argname "<model>" + :description "Model to use for anonymization" + :argname "<model>" anonymize:option "--prompt" - :description "Prompt to use for anonymization" - :argname "<prompt>" + :description "Prompt to use for anonymization" + :argname "<prompt>" local sign = parser:command "sign" - :description "Performs DKIM signing" + :description "Performs DKIM signing" sign:argument "file" :description "File to process" :argname "<file>" @@ -225,33 +225,33 @@ sign:option "-t --type" :description "ARC or DKIM signing" :argname("<arc|dkim>") :convert { - ['arc'] = 'arc', - ['dkim'] = 'dkim', -} + ['arc'] = 'arc', + ['dkim'] = 'dkim', + } :default 'dkim' sign:option "-o --output" :description "Output format" :argname("<message|signature>") :convert { - ['message'] = 'message', - ['signature'] = 'signature', -} + ['message'] = 'message', + ['signature'] = 'signature', + } :default 'message' local dump = parser:command "dump" - :description "Dumps a raw message in different formats" + :description "Dumps a raw message in different formats" dump:argument "file" :description "File to process" :argname "<file>" :args "+" -- Duplicate format for convenience dump:mutex( - parser:flag "-j --json" - :description "JSON output", - parser:flag "-U --ucl" - :description "UCL output", - parser:flag "-M --messagepack" - :description "MessagePack output" + parser:flag "-j --json" + :description "JSON output", + parser:flag "-U --ucl" + :description "UCL output", + parser:flag "-M --messagepack" + :description "MessagePack output" ) dump:flag "-s --split" :description "Split the output file contents such that no content is embedded" @@ -260,7 +260,7 @@ dump:option "-o --outdir" :description "Output directory" :argname("<directory>") -local function load_config(opts) +local function load_config(opts, load_tokenizers) local _r, err = rspamd_config:load_ucl(opts['config']) if not _r then @@ -273,6 +273,23 @@ local function load_config(opts) rspamd_logger.errx('cannot process %s: %s', opts['config'], err) os.exit(1) end + + -- Load custom tokenizers if requested + if load_tokenizers then + local success, tokenizer_err = rspamd_config:load_custom_tokenizers() + if not success then + rspamd_logger.errx('cannot load custom tokenizers: %s', tokenizer_err or 'unknown error') + -- Don't exit here as custom tokenizers are optional + rspamd_logger.warnx('proceeding without custom tokenizers') + end + end +end + +-- Helper function to ensure proper cleanup of tokenizers +local function cleanup_tokenizers() + if rspamd_config then + rspamd_config:unload_custom_tokenizers() + end end local function load_task(_, fname) @@ -288,13 +305,13 @@ local function load_task(_, fname) if not res then parser:error(string.format('cannot read message from %s: %s', fname, - task)) + task)) return nil end if not task:process_message() then parser:error(string.format('cannot read message from %s: %s', fname, - 'failed to parse')) + 'failed to parse')) return nil end @@ -335,7 +352,6 @@ local function print_elts(elts, opts, func) io.write(ucl.to_format(elts, output_fmt(opts))) else fun.each(function(fname, elt) - if not opts.json and not opts.ucl then if func then elt = fun.map(func, elt) @@ -357,7 +373,7 @@ local function extract_handler(opts) if opts.words then -- Enable stemming and urls detection - load_config(opts) + load_config(opts, true) -- Load with custom tokenizers rspamd_url.init(rspamd_config:get_tld_path()) rspamd_config:init_subsystem('langdet') end @@ -372,39 +388,38 @@ local function extract_handler(opts) if not opts.json and not opts.ucl then table.insert(out, - rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s', - part:get_mimepart():get_digest():sub(1, 8), - t, - part:get_language(), - part:get_length(), part:get_raw_length(), - part:get_words_count())) + rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s', + part:get_mimepart():get_digest():sub(1, 8), + t, + part:get_language(), + part:get_length(), part:get_raw_length(), + part:get_words_count())) table.insert(out, - rspamd_logger.slog('Stats: %s', - fun.foldl(function(acc, k, v) - if acc ~= '' then - return string.format('%s, %s:%s', acc, k, v) - else - return string.format('%s:%s', k, v) - end - end, '', part:get_stats()))) + rspamd_logger.slog('Stats: %s', + fun.foldl(function(acc, k, v) + if acc ~= '' then + return string.format('%s, %s:%s', acc, k, v) + else + return string.format('%s:%s', k, v) + end + end, '', part:get_stats()))) end end end local function maybe_print_mime_part_info(part, out) if opts.part then - if not opts.json and not opts.ucl then local mtype, msubtype = part:get_type() local det_mtype, det_msubtype = part:get_detected_type() table.insert(out, - rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s', - part:get_digest():sub(1, 8), - mtype, msubtype, - det_mtype, det_msubtype, - part:get_filename(), - part:get_detected_ext(), - part:get_length())) + rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s', + part:get_digest():sub(1, 8), + mtype, msubtype, + det_mtype, det_msubtype, + part:get_filename(), + part:get_detected_ext(), + part:get_length())) end end end @@ -416,17 +431,17 @@ local function extract_handler(opts) return table.concat(words, ' ') else return table.concat( - fun.totable( - fun.map(function(w) - -- [1] - stemmed word - -- [2] - normalised word - -- [3] - raw word - -- [4] - flags (table of strings) - return string.format('%s|%s|%s(%s)', - w[3], w[2], w[1], table.concat(w[4], ',')) - end, words) - ), - ' ' + fun.totable( + fun.map(function(w) + -- [1] - stemmed word + -- [2] - normalised word + -- [3] - raw word + -- [4] - flags (table of strings) + return string.format('%s|%s|%s(%s)', + w[3], w[2], w[1], table.concat(w[4], ',')) + end, words) + ), + ' ' ) end end @@ -443,7 +458,7 @@ local function extract_handler(opts) if opts.words then local how_words = opts['words_format'] or 'stem' table.insert(out_elts[fname], 'meta_words: ' .. - print_words(task:get_meta_words(how_words), how_words == 'full')) + print_words(task:get_meta_words(how_words), how_words == 'full')) end if opts.text or opts.html then @@ -466,7 +481,7 @@ local function extract_handler(opts) if opts.words then local how_words = opts['words_format'] or 'stem' table.insert(out_elts[fname], print_words(part:get_words(how_words), - how_words == 'full')) + how_words == 'full')) else table.insert(out_elts[fname], tostring(part:get_content(how))) end @@ -480,7 +495,7 @@ local function extract_handler(opts) if opts.words then local how_words = opts['words_format'] or 'stem' table.insert(out_elts[fname], print_words(part:get_words(how_words), - how_words == 'full')) + how_words == 'full')) else if opts.structure then local hc = part:get_html() @@ -489,11 +504,11 @@ local function extract_handler(opts) local fun = require "fun" if type(elt) == 'table' then return table.concat(fun.totable( - fun.map( - function(t) - return rspamd_logger.slog("%s", t) - end, - elt)), '\n') + fun.map( + function(t) + return rspamd_logger.slog("%s", t) + end, + elt)), '\n') else return rspamd_logger.slog("%s", elt) end @@ -524,7 +539,7 @@ local function extract_handler(opts) if opts.invisible then local hc = part:get_html() table.insert(out_elts[fname], string.format('invisible content: %s', - tostring(hc:get_invisible()))) + tostring(hc:get_invisible()))) end end end @@ -544,13 +559,18 @@ local function extract_handler(opts) for _, task in ipairs(tasks) do task:destroy() end + + -- Cleanup custom tokenizers if they were loaded + if opts.words then + cleanup_tokenizers() + end end local function stat_handler(opts) local fun = require "fun" local out_elts = {} - load_config(opts) + load_config(opts, true) -- Load with custom tokenizers for stat generation rspamd_url.init(rspamd_config:get_tld_path()) rspamd_config:init_subsystem('langdet,stat') -- Needed to gen stat tokens @@ -571,10 +591,10 @@ local function stat_handler(opts) out_elts[fname] = bt process_func = function(e) return string.format('%s (%d): "%s"+"%s", [%s]', e.data, e.win, e.t1 or "", - e.t2 or "", table.concat(fun.totable( - fun.map(function(k) - return k - end, e.flags)), ",")) + e.t2 or "", table.concat(fun.totable( + fun.map(function(k) + return k + end, e.flags)), ",")) end elseif opts.fuzzy then local parts = task:get_parts() or {} @@ -601,16 +621,16 @@ local function stat_handler(opts) digest = digest, shingles = shingles, type = string.format('%s/%s', - ({ part:get_type() })[1], - ({ part:get_type() })[2]) + ({ part:get_type() })[1], + ({ part:get_type() })[2]) }) else table.insert(out_elts[fname], { digest = part:get_digest(), file = part:get_filename(), type = string.format('%s/%s', - ({ part:get_type() })[1], - ({ part:get_type() })[2]) + ({ part:get_type() })[1], + ({ part:get_type() })[2]) }) end end @@ -621,10 +641,13 @@ local function stat_handler(opts) end print_elts(out_elts, opts, process_func) + + -- Cleanup custom tokenizers + cleanup_tokenizers() end local function urls_handler(opts) - load_config(opts) + load_config(opts, false) -- URLs don't need custom tokenizers rspamd_url.init(rspamd_config:get_tld_path()) local out_elts = {} @@ -764,7 +787,7 @@ local function newline(task) end local function modify_handler(opts) - load_config(opts) + load_config(opts, false) -- Modification doesn't need custom tokenizers rspamd_url.init(rspamd_config:get_tld_path()) local function read_file(file) @@ -804,10 +827,10 @@ local function modify_handler(opts) if hname == name then local new_value = string.format(hpattern, hdr.decoded) new_value = string.format('%s:%s%s', - name, hdr.separator, - rspamd_util.fold_header(name, - rspamd_util.mime_header_encode(new_value), - task:get_newlines_type())) + name, hdr.separator, + rspamd_util.fold_header(name, + rspamd_util.mime_header_encode(new_value), + task:get_newlines_type())) out[#out + 1] = new_value return end @@ -816,12 +839,12 @@ local function modify_handler(opts) if rewrite.need_rewrite_ct then if name:lower() == 'content-type' then local nct = string.format('%s: %s/%s; charset=utf-8', - 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype) + 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype) out[#out + 1] = nct return elseif name:lower() == 'content-transfer-encoding' then out[#out + 1] = string.format('%s: %s', - 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable') + 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable') seen_cte = true return end @@ -837,13 +860,13 @@ local function modify_handler(opts) if hname and hvalue then out[#out + 1] = string.format('%s: %s', hname, - rspamd_util.fold_header(hname, hvalue, task:get_newlines_type())) + rspamd_util.fold_header(hname, hvalue, task:get_newlines_type())) end end if not seen_cte and rewrite.need_rewrite_ct then out[#out + 1] = string.format('%s: %s', - 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable') + 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable') end -- End of headers @@ -883,7 +906,7 @@ local function modify_handler(opts) end local function sign_handler(opts) - load_config(opts) + load_config(opts, false) -- Signing doesn't need custom tokenizers rspamd_url.init(rspamd_config:get_tld_path()) local lua_dkim = require("lua_ffi").dkim @@ -927,11 +950,11 @@ local function sign_handler(opts) io.flush() else local dkim_hdr = string.format('%s: %s%s', - 'DKIM-Signature', - rspamd_util.fold_header('DKIM-Signature', - rspamd_util.mime_header_encode(sig), - task:get_newlines_type()), - newline(task)) + 'DKIM-Signature', + rspamd_util.fold_header('DKIM-Signature', + rspamd_util.mime_header_encode(sig), + task:get_newlines_type()), + newline(task)) io.write(dkim_hdr) io.flush() task:get_content():save_in_file(1) @@ -942,7 +965,7 @@ local function sign_handler(opts) end local function strip_handler(opts) - load_config(opts) + load_config(opts, false) -- Stripping doesn't need custom tokenizers rspamd_url.init(rspamd_config:get_tld_path()) for _, fname in ipairs(opts.file) do @@ -998,7 +1021,7 @@ local function strip_handler(opts) end local function anonymize_handler(opts) - load_config(opts) + load_config(opts, false) -- Anonymization doesn't need custom tokenizers rspamd_url.init(rspamd_config:get_tld_path()) for _, fname in ipairs(opts.file) do @@ -1103,7 +1126,7 @@ local function get_dump_content(task, opts, fname) end local function dump_handler(opts) - load_config(opts) + load_config(opts, false) -- Dumping doesn't need custom tokenizers rspamd_url.init(rspamd_config:get_tld_path()) for _, fname in ipairs(opts.file) do diff --git a/lualib/rspamadm/statistics_dump.lua b/lualib/rspamadm/statistics_dump.lua index 6bc045850..6a08d11fd 100644 --- a/lualib/rspamadm/statistics_dump.lua +++ b/lualib/rspamadm/statistics_dump.lua @@ -42,6 +42,12 @@ parser:option "-c --config" :argname("<cfg>") :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf") +parser:option "-b --batch-size" + :description "Number of entries to process at once" + :argname("<elts>") + :convert(tonumber) + :default(1000) + -- Extract subcommand local dump = parser:command "dump d" :description "Dump bayes statistics" @@ -54,7 +60,7 @@ dump:mutex( dump:flag "-c --compress" :description "Compress output" dump:option "-b --batch-size" - :description "Number of entires to process at once" + :description "Number of entries to process at once" :argname("<elts>") :convert(tonumber) :default(1000) @@ -68,12 +74,12 @@ restore:argument "file" :argname "<file>" :args "*" restore:option "-b --batch-size" - :description "Number of entires to process at once" + :description "Number of entries to process at once" :argname("<elts>") :convert(tonumber) :default(1000) restore:option "-m --mode" - :description "Number of entires to process at once" + :description "Number of entries to process at once" :argname("<append|subtract|replace>") :convert { ['append'] = 'append', @@ -287,11 +293,11 @@ local function dump_pattern(conn, pattern, opts, out, key) -- Do not write the last chunk of out as it will be processed afterwards if cursor ~= 0 then if opts.cdb then - dump_out(out, opts, false) - clear_fcn(out) - else dump_cdb(out, opts, false, key) out[key].elts = {} + else + dump_out(out, opts, false) + clear_fcn(out) end elseif opts.cdb then dump_cdb(out, opts, true, key) @@ -541,4 +547,4 @@ return { aliases = { 'stat_dump', 'bayes_dump' }, handler = handler, description = parser._description -}
\ No newline at end of file +} |