aboutsummaryrefslogtreecommitdiffstats
path: root/lualib
diff options
context:
space:
mode:
Diffstat (limited to 'lualib')
-rw-r--r--lualib/lua_cache.lua475
-rw-r--r--lualib/lua_cfg_transform.lua22
-rw-r--r--lualib/lua_magic/patterns.lua17
-rw-r--r--lualib/lua_magic/types.lua7
-rw-r--r--lualib/lua_maps.lua100
-rw-r--r--lualib/lua_maps_expressions.lua2
-rw-r--r--lualib/lua_mime.lua304
-rw-r--r--lualib/lua_mime_types.lua2
-rw-r--r--lualib/lua_redis.lua54
-rw-r--r--lualib/lua_scanners/cloudmark.lua56
-rw-r--r--lualib/lua_util.lua51
-rw-r--r--lualib/plugins/neural.lua2
-rw-r--r--lualib/plugins/rbl.lua5
-rw-r--r--lualib/redis_scripts/bayes_cache_learn.lua2
-rw-r--r--lualib/redis_scripts/neural_save_unlock.lua13
-rw-r--r--lualib/rspamadm/mime.lua409
-rw-r--r--lualib/rspamadm/statistics_dump.lua20
17 files changed, 1162 insertions, 379 deletions
diff --git a/lualib/lua_cache.lua b/lualib/lua_cache.lua
new file mode 100644
index 000000000..c87a9dc78
--- /dev/null
+++ b/lualib/lua_cache.lua
@@ -0,0 +1,475 @@
+--[[
+Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[[
+-- @module lua_cache
+-- This module provides a Redis-based caching API for Rspamd with support for
+-- concurrent operations across multiple workers. It includes features like
+-- distributed locking via PENDING markers, automatic key hashing,
+-- configurable serialization formats, and TTL management.
+--
+@example
+local redis_cache = require "lua_cache"
+local redis_params = redis_lib.parse_redis_server('reputation')
+
+-- Create cache context
+local cache_context = redis_cache.create_cache_context(redis_params, {
+ cache_prefix = "rspamd_reputation",
+ cache_ttl = 86400, -- 1 day
+ cache_format = "json",
+ cache_hash_len = 16,
+ cache_use_hashing = true
+})
+
+-- Example usage in a task
+local function process_url_reputation(task, url)
+ local cache_key = url:get_tld()
+
+ -- Try to get data from cache first
+ redis_cache.cache_get(task, cache_key, cache_context, 5.0,
+ -- This callback is called on cache miss
+ function(task)
+ -- Perform expensive reputation lookup
+ local reputation = calculate_reputation(task, url)
+
+ -- Store result in cache for future use
+ redis_cache.cache_set(task, cache_key, {
+ score = reputation.score,
+ categories = reputation.categories,
+ timestamp = os.time()
+ }, cache_context)
+
+ -- Use the result
+ apply_reputation_rules(task, url, reputation)
+ end,
+ -- This callback is called when cache data is available
+ function(task, err, data)
+ if err then
+ logger.errx(task, "Cache error for %s: %s", cache_key, err)
+ return
+ end
+
+ -- Use the cached data
+ apply_reputation_rules(task, url, data)
+ end
+ )
+end
+--]]
+
+local logger = require "rspamd_logger"
+local ucl = require "ucl"
+local lua_util = require "lua_util"
+local rspamd_util = require "rspamd_util"
+local lua_redis = require "lua_redis"
+local hasher = require "rspamd_cryptobox_hash"
+
+local N = "lua_cache"
+local exports = {}
+
+-- Default options
+local default_opts = {
+ cache_prefix = "rspamd_cache",
+ cache_ttl = 3600, -- 1 hour
+ cache_probes = 5, -- Number of times to check a pending key
+ cache_format = "json", -- Serialization format
+ cache_hash_len = 16, -- Number of hex symbols to use for hashed keys
+ cache_use_hashing = false -- Whether to hash keys by default
+}
+
+-- Create a hash of the key using the configured length
+local function hash_key(key, hash_len)
+ local h = hasher.create(key)
+ local hex = h:hex()
+
+ if hash_len and hash_len > 0 and hash_len < #hex then
+ return string.sub(hex, 1, hash_len)
+ end
+
+ return hex
+end
+
+-- Get the appropriate key based on hashing configuration
+local function get_cache_key(raw_key, cache_context, force_hashing)
+ -- Determine whether to hash based on context settings and force parameter
+ local should_hash = force_hashing
+ if should_hash == nil then
+ should_hash = cache_context.opts.cache_use_hashing
+ end
+
+ if should_hash then
+ lua_util.debugm(N, rspamd_config, "hashing key '%s' with hash length %s",
+ raw_key, cache_context.opts.cache_hash_len)
+ return hash_key(raw_key, cache_context.opts.cache_hash_len)
+ else
+ return raw_key
+ end
+end
+
+-- Create a caching context with the provided options
+local function create_cache_context(redis_params, opts, module_name)
+ if not redis_params then
+ return nil, "Redis parameters must be provided"
+ end
+
+ local cache_context = {}
+ cache_context.redis_params = redis_params
+
+ -- Process and merge configuration options
+ cache_context.opts = lua_util.override_defaults(default_opts, opts)
+ cache_context.N = module_name or N
+
+ -- Register Redis prefix
+ lua_redis.register_prefix(cache_context.opts.cache_prefix,
+ "caching",
+ "Cache API prefix")
+
+ lua_util.debugm(N, rspamd_config, "registered redis prefix: %s", cache_context.opts.cache_prefix)
+
+ -- Remove cache related options from opts table
+ if opts then
+ lua_util.debugm(N, rspamd_config, "removing cache options from original opts table")
+ opts.cache_prefix = nil
+ opts.cache_ttl = nil
+ opts.cache_probes = nil
+ opts.cache_format = nil
+ opts.cache_hash_len = nil
+ opts.cache_use_hashing = nil
+ end
+
+ -- Set serialization and deserialization functions
+ if cache_context.opts.cache_format == "messagepack" then
+ lua_util.debugm(cache_context.N, rspamd_config, "using messagepack for serialization")
+
+ cache_context.encode = function(data)
+ return ucl.to_format(data, 'msgpack')
+ end
+
+ cache_context.decode = function(raw_data)
+ local ucl_parser = ucl.parser()
+ local ok, ucl_err = ucl_parser:parse_text(raw_data, 'messagepack')
+ if not ok then
+ lua_util.debugm(cache_context.N, rspamd_config, "failed to parse messagepack data: %s", ucl_err)
+ return nil
+ end
+ return ucl_parser:get_object()
+ end
+ else
+ -- Default to JSON
+ lua_util.debugm(cache_context.N, rspamd_config, "using json for serialization")
+
+ cache_context.encode = function(data)
+ return ucl.to_format(data, 'json')
+ end
+
+ cache_context.decode = function(raw_data)
+ local ucl_parser = ucl.parser()
+ local ok, ucl_err = ucl_parser:parse_text(raw_data)
+ if not ok then
+ lua_util.debugm(cache_context.N, rspamd_config, "failed to parse json data: %s", ucl_err)
+ return nil
+ end
+ return ucl_parser:get_object()
+ end
+ end
+
+ lua_util.debugm(cache_context.N, rspamd_config, "cache context created: %s", cache_context.opts)
+ return cache_context
+end
+
+-- Encode data for storage in Redis with proper formatting
+local function encode_data(data, cache_context)
+ lua_util.debugm(cache_context.N, rspamd_config, "encoding data using %s format", cache_context.opts.cache_format)
+ return cache_context.encode(data)
+end
+
+-- Decode data from Redis with proper formatting
+local function decode_data(data, cache_context)
+ if not data then
+ lua_util.debugm(cache_context.N, rspamd_config, "cannot decode nil data")
+ return nil
+ end
+ lua_util.debugm(cache_context.N, rspamd_config, "decoding data using %s format", cache_context.opts.cache_format)
+ return cache_context.decode(data)
+end
+
+-- Check if a value is a PENDING marker and extract its details
+local function parse_pending_value(value, cache_context)
+ if type(value) ~= 'string' then
+ lua_util.debugm(cache_context.N, rspamd_config, "value is not a string, cannot be a pending marker")
+ return nil
+ end
+
+ -- Check if the value starts with PENDING:
+ if string.sub(value, 1, 8) ~= "PENDING:" then
+ lua_util.debugm(cache_context.N, rspamd_config, "value doesn't start with PENDING: prefix")
+ return nil
+ end
+
+ lua_util.debugm(cache_context.N, rspamd_config, "found PENDING marker, extracting data")
+ local pending_data = string.sub(value, 9)
+ return decode_data(pending_data, cache_context)
+end
+
+-- Create a pending marker with hostname and timeout
+local function create_pending_marker(timeout, cache_context)
+ local hostname = rspamd_util.get_hostname()
+ local pending_data = {
+ hostname = hostname,
+ timeout = timeout,
+ timestamp = os.time()
+ }
+
+ lua_util.debugm(cache_context.N, rspamd_config, "creating PENDING marker for host %s, timeout %s",
+ hostname, timeout)
+
+ return "PENDING:" .. encode_data(pending_data, cache_context)
+end
+
+-- Check cache and handle the result appropriately
+local function cache_get(task, key, cache_context, timeout, callback_uncached, callback_data)
+ if not task or not key or not cache_context or not callback_uncached or not callback_data then
+ logger.errx(task, "missing required parameters for cache_get")
+ return false
+ end
+
+ local full_key = cache_context.opts.cache_prefix .. "_" .. get_cache_key(key, cache_context, false)
+ lua_util.debugm(cache_context.N, task, "cache lookup for key: %s (%s)", key, full_key)
+
+ -- Function to check a pending key
+ local function check_pending(pending_info)
+ local probe_count = 0
+ local probe_interval = timeout / (cache_context.opts.cache_probes or 5)
+
+ lua_util.debugm(cache_context.N, task, "setting up probes for pending key %s, interval: %s seconds",
+ full_key, probe_interval)
+
+ -- Set up a timer to probe the key
+ local function probe_key()
+ probe_count = probe_count + 1
+ lua_util.debugm(cache_context.N, task, "probe #%s/%s for pending key %s",
+ probe_count, cache_context.opts.cache_probes, full_key)
+
+ if probe_count >= cache_context.opts.cache_probes then
+ logger.infox(task, "maximum probes reached for key %s, considering it failed", full_key)
+ lua_util.debugm(cache_context.N, task, "maximum probes reached for key %s, giving up", full_key)
+ callback_data(task, "timeout waiting for pending key", nil)
+ return
+ end
+
+ lua_util.debugm(cache_context.N, task, "probing redis for key %s", full_key)
+ lua_redis.redis_make_request(task, cache_context.redis_params, key, false,
+ function(err, data)
+ if err then
+ logger.errx(task, "redis error while probing key %s: %s", full_key, err)
+ lua_util.debugm(cache_context.N, task, "redis error during probe: %s, retrying later", err)
+ task:add_timer(probe_interval, probe_key)
+ return
+ end
+
+ if not data or type(data) == 'userdata' then
+ lua_util.debugm(cache_context.N, task, "pending key %s disappeared, calling uncached handler", full_key)
+ callback_uncached(task)
+ return
+ end
+
+ local pending = parse_pending_value(data, cache_context)
+ if pending then
+ lua_util.debugm(cache_context.N, task, "key %s still pending (host: %s), retrying later",
+ full_key, pending.hostname)
+ task:add_timer(probe_interval, probe_key)
+ else
+ lua_util.debugm(cache_context.N, task, "pending key %s resolved to actual data", full_key)
+ callback_data(task, nil, decode_data(data, cache_context))
+ end
+ end,
+ 'GET', { full_key }
+ )
+ end
+
+ -- Start the first probe after the initial probe interval
+ lua_util.debugm(cache_context.N, task, "scheduling first probe for %s in %s seconds",
+ full_key, probe_interval)
+ task:add_timer(probe_interval, probe_key)
+ end
+
+ -- Initial cache lookup
+ lua_util.debugm(cache_context.N, task, "making initial redis GET request for key: %s", full_key)
+ lua_redis.redis_make_request(task, cache_context.redis_params, key, false,
+ function(err, data)
+ if err then
+ logger.errx(task, "redis error looking up key %s: %s", full_key, err)
+ lua_util.debugm(cache_context.N, task, "redis error: %s, calling uncached handler", err)
+ callback_uncached(task)
+ return
+ end
+
+ if not data or type(data) == 'userdata' then
+ -- Key not found, set pending and call the uncached callback
+ lua_util.debugm(cache_context.N, task, "key %s not found in cache, creating pending marker", full_key)
+ local pending_marker = create_pending_marker(timeout, cache_context)
+
+ lua_util.debugm(cache_context.N, task, "setting pending marker for key %s with TTL %s",
+ full_key, timeout * 2)
+ lua_redis.redis_make_request(task, cache_context.redis_params, key, true,
+ function(set_err, set_data)
+ if set_err then
+ logger.errx(task, "redis error setting pending marker for %s: %s", full_key, set_err)
+ lua_util.debugm(cache_context.N, task, "failed to set pending marker: %s", set_err)
+ else
+ lua_util.debugm(cache_context.N, task, "successfully set pending marker for %s", full_key)
+ end
+ lua_util.debugm(cache_context.N, task, "calling uncached handler for %s", full_key)
+ callback_uncached(task)
+ end,
+ 'SETEX', { full_key, tostring(timeout * 2), pending_marker }
+ )
+ else
+ -- Key found, check if it's a pending marker or actual data
+ local pending = parse_pending_value(data, cache_context)
+
+ if pending then
+ -- Key is being processed by another worker
+ lua_util.debugm(cache_context.N, task, "key %s is pending on host %s, waiting for result",
+ full_key, pending.hostname)
+ check_pending(pending)
+ else
+ -- Extend TTL and return data
+ lua_util.debugm(cache_context.N, task, "found cached data for key %s, extending TTL to %s",
+ full_key, cache_context.opts.cache_ttl)
+ lua_redis.redis_make_request(task, cache_context.redis_params, key, true,
+ function(expire_err, _)
+ if expire_err then
+ logger.errx(task, "redis error extending TTL for %s: %s", full_key, expire_err)
+ lua_util.debugm(cache_context.N, task, "failed to extend TTL: %s", expire_err)
+ else
+ lua_util.debugm(cache_context.N, task, "successfully extended TTL for %s", full_key)
+ end
+ end,
+ 'EXPIRE', { full_key, tostring(cache_context.opts.cache_ttl) }
+ )
+
+ lua_util.debugm(cache_context.N, task, "returning cached data for key %s", full_key)
+ callback_data(task, nil, decode_data(data, cache_context))
+ end
+ end
+ end,
+ 'GET', { full_key }
+ )
+
+ return true
+end
+
+-- Save data to the cache
+local function cache_set(task, key, data, cache_context)
+ if not task or not key or not data or not cache_context then
+ logger.errx(task, "missing required parameters for cache_set")
+ return false
+ end
+
+ local full_key = cache_context.opts.cache_prefix .. "_" .. get_cache_key(key, cache_context, false)
+ lua_util.debugm(cache_context.N, task, "caching data for key: %s (%s) with TTL: %s",
+ full_key, key, cache_context.opts.cache_ttl)
+
+ local encoded_data = encode_data(data, cache_context)
+
+ -- Store the data with expiration
+ lua_util.debugm(cache_context.N, task, "making redis SETEX request for key: %s", full_key)
+ return lua_redis.redis_make_request(task, cache_context.redis_params, key, true,
+ function(err, result)
+ if err then
+ logger.errx(task, "redis error setting cached data for %s: %s", full_key, err)
+ lua_util.debugm(cache_context.N, task, "failed to cache data: %s", err)
+ else
+ lua_util.debugm(cache_context.N, task, "successfully cached data for key %s", full_key)
+ end
+ end,
+ 'SETEX', { full_key, tostring(cache_context.opts.cache_ttl), encoded_data }
+ )
+end
+
+-- Delete a cache entry
+local function cache_del(task, key, cache_context)
+ if not task or not key or not cache_context then
+ logger.errx(task, "missing required parameters for cache_del")
+ return false
+ end
+
+ local full_key = cache_context.opts.cache_prefix .. "_" .. get_cache_key(key, cache_context, false)
+ lua_util.debugm(cache_context.N, task, "deleting cache key: %s", full_key)
+
+ return lua_redis.redis_make_request(task, cache_context.redis_params, key, true,
+ function(err, result)
+ if err then
+ logger.errx(task, "redis error deleting cache key %s: %s", full_key, err)
+ lua_util.debugm(cache_context.N, task, "failed to delete cache key: %s", err)
+ else
+ local count = tonumber(result) or 0
+ lua_util.debugm(cache_context.N, task, "successfully deleted cache key %s (%s keys removed)",
+ full_key, count)
+ end
+ end,
+ 'DEL', { full_key }
+ )
+end
+
+-- Export the API functions
+---[[[
+-- @function lua_cache.create_cache_context(redis_params, opts, module_name)
+-- Creates a Redis caching context with specified parameters and options
+-- @param {table} redis_params Redis connection parameters (required)
+-- @param {table} opts Optional configuration parameters:
+-- * `cache_prefix`: Key prefix for Redis (default: "rspamd_cache")
+-- * `cache_ttl`: TTL in seconds for cached entries (default: 3600)
+-- * `cache_probes`: Number of times to check pending keys (default: 5)
+-- * `cache_format`: Serialization format - "json" or "messagepack" (default: "json")
+-- * `cache_hash_len`: Number of hex symbols for hashed keys (default: 16)
+-- * `cache_use_hashing`: Whether to hash keys by default (default: true)
+-- @return {table} Cache context or nil + error message on failure
+--]]
+exports.create_cache_context = create_cache_context
+---[[[
+-- @function รง.cache_get(task, key, cache_context, timeout, callback_uncached, callback_data)
+-- Retrieves data from cache, handling pending states and cache misses appropriately
+-- @param {rspamd_task} task Current task (required)
+-- @param {string} key Cache key (required)
+-- @param {table} cache_context Redis cache context from create_cache_context (required)
+-- @param {number} timeout Timeout for pending operations in seconds (required)
+-- @param {function} callback_uncached Function to call on cache miss: callback_uncached(task) (required)
+-- @param {function} callback_data Function to call when data is available: callback_data(task, err, data) (required)
+-- @return {boolean} true if request was initiated successfully, false otherwise
+--]]
+exports.cache_get = cache_get
+---[[[
+-- @function lua_cache.cache_set(task, key, data, cache_context)
+-- Stores data in the cache with the configured TTL
+-- @param {rspamd_task} task Current task (required)
+-- @param {string} key Cache key (required)
+-- @param {table} data Data to store in the cache (required)
+-- @param {table} cache_context Redis cache context from create_cache_context (required)
+-- @return {boolean} true if request was initiated successfully, false otherwise
+--]]
+exports.cache_set = cache_set
+---[[[
+-- @function lua_cache.cache_del(task, key, cache_context)
+-- Deletes data from the cache
+-- @param {rspamd_task} task Current task (required)
+-- @param {string} key Cache key (required)
+-- @param {table} cache_context Redis cache context from create_cache_context (required)
+-- @return {boolean} true if request was initiated successfully, false otherwise
+--]]
+exports.cache_del = cache_del
+
+return exports
diff --git a/lualib/lua_cfg_transform.lua b/lualib/lua_cfg_transform.lua
index 265ca34c0..ec11ef299 100644
--- a/lualib/lua_cfg_transform.lua
+++ b/lualib/lua_cfg_transform.lua
@@ -198,20 +198,22 @@ end
local function symbol_transform(cfg, k, v)
local groups = cfg:at('group')
- -- first try to find any group where there is a definition of this symbol
- for gr_n, gr in groups:pairs() do
- local symbols = gr:at('symbols')
- if symbols and symbols:at(k) then
- -- We override group symbol with ungrouped symbol
- logger.infox("overriding group symbol %s in the group %s", k, gr_n)
- symbols[k] = lua_util.override_defaults(symbols:at(k):unwrap(), v:unwrap())
- return
+ if groups then
+ -- first try to find any group where there is a definition of this symbol
+ for gr_n, gr in groups:pairs() do
+ local symbols = gr:at('symbols')
+ if symbols and symbols:at(k) then
+ -- We override group symbol with ungrouped symbol
+ logger.infox("overriding group symbol %s in the group %s", k, gr_n)
+ symbols[k] = lua_util.override_defaults(symbols:at(k):unwrap(), v:unwrap())
+ return
+ end
end
end
-- Now check what Rspamd knows about this symbol
local sym = rspamd_config:get_symbol(k)
- if not sym or not sym.group then
+ if groups and (not sym or not sym.group) then
-- Otherwise we just use group 'ungrouped'
if not groups:at('ungrouped') then
groups.ungrouped = {
@@ -374,7 +376,7 @@ return function(cfg)
local next_act = actions_order[j]
if actions:at(next_act) and actions:at(next_act):type() == 'number' then
local next_score = actions:at(next_act):unwrap()
- if next_score <= score then
+ if type(score) == 'number' and type(next_score) == 'number' and next_score <= score then
logger.errx(rspamd_config, 'invalid actions thresholds order: action %s (%s) must have lower ' ..
'score than action %s (%s)', act, score, next_act, next_score)
ret = false
diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua
index 971ddd95f..4a5abd8ce 100644
--- a/lualib/lua_magic/patterns.lua
+++ b/lualib/lua_magic/patterns.lua
@@ -466,6 +466,23 @@ local patterns = {
},
}
},
+ heic = {
+ matches = {
+ {
+ -- HEIC/HEIF file format signature
+ -- Starts with ftyp followed by specific brand identifiers
+ string = "^....ftyphe[im][cs]",
+ position = 12,
+ weight = 60,
+ },
+ {
+ -- Alternative signature for HEIC/HEIF
+ string = [[^....ftypmif1]],
+ position = 12,
+ weight = 60,
+ },
+ }
+ },
}
return patterns
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index 3dce2e1f8..ad4ae4349 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -279,6 +279,11 @@ local types = {
ct = 'image/bmp',
av_check = false,
},
+ heic = {
+ type = 'image',
+ ct = 'image/heic',
+ av_check = false,
+ },
dwg = {
type = 'image',
ct = 'image/vnd.dwg',
@@ -324,4 +329,4 @@ local types = {
},
}
-return types \ No newline at end of file
+return types
diff --git a/lualib/lua_maps.lua b/lualib/lua_maps.lua
index 2699ea214..c45b51b97 100644
--- a/lualib/lua_maps.lua
+++ b/lualib/lua_maps.lua
@@ -88,16 +88,64 @@ end
local external_map_schema = ts.shape {
external = ts.equivalent(true), -- must be true
- backend = ts.string, -- where to get data, required
- method = ts.one_of { "body", "header", "query" }, -- how to pass input
+ backend = ts.string:is_optional(), -- where to get data, required for HTTP
+ cdb = ts.string:is_optional(), -- path to CDB file, required for CDB
+ method = ts.one_of { "body", "header", "query" }:is_optional(), -- how to pass input
encode = ts.one_of { "json", "messagepack" }:is_optional(), -- how to encode input (if relevant)
timeout = (ts.number + ts.string / lua_util.parse_time_interval):is_optional(),
}
+-- Storage for CDB instances
+local cdb_maps = {}
+local cdb_finisher_set = false
+
local rspamd_http = require "rspamd_http"
local ucl = require "ucl"
+-- Function to handle CDB maps
+local function handle_cdb_map(map_config, key, callback, task)
+ local rspamd_cdb = require "rspamd_cdb"
+ local hash_key = map_config.cdb
+
+ -- Check if we need to open the CDB file
+ if not cdb_maps[hash_key] then
+ local cdb_file = map_config.cdb
+ -- Provide ev_base to monitor changes
+ local cdb_handle = rspamd_cdb.open(cdb_file, task:get_ev_base())
+
+ if not cdb_handle then
+ local err_msg = string.format("Failed to open CDB file: %s", cdb_file)
+ rspamd_logger.errx(task, err_msg)
+ if callback then
+ callback(false, err_msg, 500, task)
+ end
+ return nil
+ else
+ cdb_maps[hash_key] = cdb_handle
+ end
+ end
+
+ -- Look up the key in CDB
+ local result = cdb_maps[hash_key]:find(key)
+
+ if callback then
+ if result then
+ callback(true, result, 200, task)
+ else
+ callback(false, 'not found', 404, task)
+ end
+ return nil
+ end
+
+ return result
+end
+
local function query_external_map(map_config, upstreams, key, callback, task)
+ -- Check if this is a CDB map
+ if map_config.cdb then
+ return handle_cdb_map(map_config, key, callback, task)
+ end
+ -- Fallback to HTTP
local http_method = (map_config.method == 'body' or map_config.method == 'form') and 'POST' or 'GET'
local upstream = upstreams:get_upstream_round_robin()
local http_headers = {
@@ -138,7 +186,8 @@ local function query_external_map(map_config, upstreams, key, callback, task)
local params_table = {}
for k, v in pairs(key) do
if type(v) == 'string' then
- table.insert(params_table, string.format('%s=%s', lua_util.url_encode_string(k), lua_util.url_encode_string(v)))
+ table.insert(params_table,
+ string.format('%s=%s', lua_util.url_encode_string(k), lua_util.url_encode_string(v)))
end
end
url = string.format('%s?%s', url, table.concat(params_table, '&'))
@@ -305,7 +354,7 @@ local function rspamd_map_add_from_ucl(opt, mtype, description, callback)
if string.find(opt[1], '^%d') then
-- List of numeric stuff (hope it's ipnets definitions)
- local map = rspamd_config:radix_from_ucl(opt)
+ local map = rspamd_config:radix_from_ucl(opt, description)
if map then
ret.__data = map
@@ -448,17 +497,39 @@ local function rspamd_map_add_from_ucl(opt, mtype, description, callback)
local parse_res, parse_err = external_map_schema(opt)
if parse_res then
- ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend)
- if ret.__upstreams then
+ if opt.cdb then
ret.__data = opt
ret.__external = true
setmetatable(ret, ret_mt)
maybe_register_selector()
+ if not cdb_finisher_set then
+ -- Register a finalize script to close all CDB handles when Rspamd stops
+ rspamd_config:register_finish_script(function()
+ for path, _ in pairs(cdb_maps) do
+ rspamd_logger.infox(rspamd_config, 'closing CDB map: %s', path)
+ cdb_maps[path] = nil
+ end
+ end)
+ cdb_finisher_set = true
+ end
+
return ret
+ elseif opt.backend then
+ ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend)
+ if ret.__upstreams then
+ ret.__data = opt
+ ret.__external = true
+ setmetatable(ret, ret_mt)
+ maybe_register_selector()
+
+ return ret
+ else
+ rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s',
+ opt.backend)
+ end
else
- rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s',
- opt.backend)
+ rspamd_logger.errx(rspamd_config, 'external map requires either "cdb" or "backend" parameter')
end
else
rspamd_logger.errx(rspamd_config, 'cannot parse external map: %s',
@@ -526,15 +597,12 @@ local function rspamd_maybe_check_map(key, what)
return rspamd_maybe_check_map(key, elt)
end, what)
end
- if type(rspamd_maps) == "table" then
- local mn
- if starts(key, "map:") then
- mn = string.sub(key, 5)
- elseif starts(key, "map://") then
- mn = string.sub(key, 7)
+ if type(rspamd_maps) == "table" and starts(key, "map:") then
+ local mn = string.sub(key, 5)
+ if starts(mn, "//") then
+ mn = string.sub(mn, 3)
end
-
- if mn and rspamd_maps[mn] then
+ if rspamd_maps[mn] then
return rspamd_maps[mn]:get_key(what)
end
end
diff --git a/lualib/lua_maps_expressions.lua b/lualib/lua_maps_expressions.lua
index 996de99c0..2ad9ad1d8 100644
--- a/lualib/lua_maps_expressions.lua
+++ b/lualib/lua_maps_expressions.lua
@@ -155,7 +155,7 @@ local function create(cfg, obj, module_name)
end
end
local map = lua_maps.map_add_from_ucl(rule.map, rule.type,
- obj.description or module_name)
+ rule.description or obj.description or module_name)
if not map then
rspamd_logger.errx(cfg, 'cannot add map for element %s in module %s',
name, module_name)
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua
index f68758ec9..c85f35066 100644
--- a/lualib/lua_mime.lua
+++ b/lualib/lua_mime.lua
@@ -158,13 +158,21 @@ exports.add_text_footer = function(task, html_footer, text_footer)
local cur_boundary
for _, part in ipairs(task:get_parts()) do
local boundary = part:get_boundary()
+ local part_ct = part:get_header('Content-Type')
+ if part_ct then
+ part_ct = rspamd_util.parse_content_type(part_ct, task:get_mempool())
+ end
if part:is_multipart() then
if cur_boundary then
out[#out + 1] = string.format('--%s',
- boundaries[#boundaries])
+ boundaries[#boundaries].boundary)
end
- boundaries[#boundaries + 1] = boundary or '--XXX'
+ boundaries[#boundaries + 1] = {
+ boundary = boundary or '--XXX',
+ ct_type = part_ct.type or '',
+ ct_subtype = part_ct.subtype or '',
+ }
cur_boundary = boundary
local rh = part:get_raw_headers()
@@ -176,7 +184,7 @@ exports.add_text_footer = function(task, html_footer, text_footer)
if cur_boundary and boundary ~= cur_boundary then
-- Need to close boundary
out[#out + 1] = string.format('--%s--%s',
- boundaries[#boundaries], newline_s)
+ boundaries[#boundaries].boundary, newline_s)
table.remove(boundaries)
cur_boundary = nil
end
@@ -218,7 +226,13 @@ exports.add_text_footer = function(task, html_footer, text_footer)
if cur_boundary and boundary ~= cur_boundary then
-- Need to close boundary
out[#out + 1] = string.format('--%s--%s',
- boundaries[#boundaries], newline_s)
+ boundaries[#boundaries].boundary, newline_s)
+ -- Need to close previous boundary, if ct_subtype is related
+ if #boundaries > 1 and boundaries[#boundaries].ct_type == "multipart" and boundaries[#boundaries].ct_subtype == "related" then
+ out[#out + 1] = string.format('--%s--%s',
+ boundaries[#boundaries -1].boundary, newline_s)
+ table.remove(boundaries)
+ end
table.remove(boundaries)
cur_boundary = boundary
end
@@ -239,7 +253,7 @@ exports.add_text_footer = function(task, html_footer, text_footer)
-- Close remaining
local b = table.remove(boundaries)
while b do
- out[#out + 1] = string.format('--%s--', b)
+ out[#out + 1] = string.format('--%s--', b.boundary)
if #boundaries > 0 then
out[#out + 1] = ''
end
@@ -1130,92 +1144,230 @@ exports.anonymize_message = function(task, settings)
local sel_part = exports.get_displayed_text_part(task)
- if sel_part then
- text_content = sel_part:get_words('norm')
- for i, w in ipairs(text_content) do
- if exclude_words_re:match(w) then
- text_content[i] = string.rep('x', #w)
+ if sel_part and settings.gpt then
+ -- LLM version
+ local gpt_settings = rspamd_config:get_all_opt('gpt')
+
+ if not gpt_settings then
+ logger.errx(task, 'no gpt settings found')
+
+ return false
+ end
+
+ -- Prepare the LLM request
+ local function send_to_llm(input_content)
+ local rspamd_http = require 'rspamd_http'
+ -- settings for LLM API
+ local llm_settings = lua_util.override_defaults(gpt_settings, {
+ api_key = settings.api_key,
+ model = settings.model,
+ timeout = settings.timeout,
+ url = settings.url,
+ })
+ -- Do not use prompt settings from the module
+ llm_settings.prompt = settings.gpt_prompt or 'Remove all personal data from the following email ' ..
+ 'and return just the anonymized content'
+
+ local request_body = {
+ model = llm_settings.model,
+ max_tokens = llm_settings.max_tokens,
+ temperature = 0,
+ messages = {
+ {
+ role = 'system',
+ content = llm_settings.prompt
+ },
+ {
+ role = 'user',
+ content = input_content
+ }
+ }
+ }
+
+ if llm_settings.type == 'ollama' then
+ request_body.stream = false
+ end
+
+ -- Make the HTTP request to the LLM API
+ local http_params = {
+ url = llm_settings.url,
+ headers = {
+ ['Authorization'] = 'Bearer ' .. llm_settings.api_key,
+ ['Content-Type'] = 'application/json'
+ },
+ body = ucl.to_format(request_body, 'json-compact'),
+ method = 'POST',
+ task = task,
+ timeout = llm_settings.timeout,
+ }
+ local err, data = rspamd_http.request(http_params)
+
+ if err then
+ logger.errx(task, 'LLM request failed: %s', err)
+ return
+ end
+
+ local parser = ucl.parser()
+ local res, parse_err = parser:parse_string(data.content)
+ if not res then
+ logger.errx(task, 'Cannot parse LLM response: %s', parse_err)
+ return
+ end
+
+ local reply = parser:get_object()
+ local anonymized_content
+ if llm_settings.type == 'openai' then
+ anonymized_content = reply.choices and reply.choices[1] and reply.choices[1].message and reply.choices[1].message.content
+ elseif llm_settings.type == 'ollama' then
+ anonymized_content = reply.message.content
+ end
+ if anonymized_content then
+ -- Replace the original content with the anonymized content
+ -- sel_part:set_content(anonymized_content) -- Not available, so rebuild message instead
+
+ -- Create new message with anonymized content
+ local cur_boundary = '--XXX'
+
+ -- Add headers
+ out[#out + 1] = {
+ string.format('Content-Type: multipart/mixed; boundary="%s"', cur_boundary),
+ true
+ }
+ for _, hdr in ipairs(modified_headers) do
+ if hdr.name:lower() ~= 'content-type' then
+ out[#out + 1] = {
+ string.format('%s: %s', hdr.name, hdr.value),
+ true
+ }
+ end
+ end
+ out[#out + 1] = { '', true }
+
+ -- Add text part with anonymized content
+ out[#out + 1] = {
+ string.format('--%s', cur_boundary),
+ true
+ }
+ out[#out + 1] = {
+ 'Content-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: quoted-printable',
+ true
+ }
+ out[#out + 1] = { '', true }
+ out[#out + 1] = {
+ rspamd_util.encode_qp(anonymized_content, 76, task:get_newlines_type()),
+ true
+ }
+
+ -- Close boundaries
+ out[#out + 1] = {
+ string.format('--%s--', cur_boundary),
+ true
+ }
+
+ state.out = out
+ state.need_rewrite_ct = true
+ state.new_ct = {
+ type = 'multipart',
+ subtype = 'mixed'
+ }
+
+ return state
end
+
+ return false
end
- end
- -- Process URLs
- local function process_url(url)
- local clean_url = url:get_host()
- local path = url:get_path()
- if path and path ~= "/" then
- clean_url = string.format("%s/%s", clean_url, path)
+ -- Send content to LLM
+ return send_to_llm(sel_part:get_content())
+ else
+
+ if sel_part then
+ text_content = sel_part:get_words('norm')
+ for i, w in ipairs(text_content) do
+ if exclude_words_re:match(w) then
+ text_content[i] = string.rep('x', #w)
+ end
+ end
end
- return string.format('https://%s', clean_url)
- end
- for _, url in ipairs(task:get_urls(true)) do
- urls[process_url(url)] = true
- end
+ -- Process URLs
+ local function process_url(url)
+ local clean_url = url:get_host()
+ local path = url:get_path()
+ if path and path ~= "/" then
+ clean_url = string.format("%s/%s", clean_url, path)
+ end
+ return string.format('https://%s', clean_url)
+ end
- -- Process emails
- local function process_email(email)
- return string.format('nobody@%s', email.domain or 'example.com')
- end
+ for _, url in ipairs(task:get_urls(true)) do
+ urls[process_url(url)] = true
+ end
- for _, email in ipairs(task:get_emails()) do
- emails[process_email(email)] = true
- end
+ -- Process emails
+ local function process_email(email)
+ return string.format('nobody@%s', email.domain or 'example.com')
+ end
+
+ for _, email in ipairs(task:get_emails()) do
+ emails[process_email(email)] = true
+ end
- -- Construct new message
- table.insert(text_content, '\nurls:')
- table.insert(text_content, table.concat(lua_util.keys(urls), ', '))
- table.insert(text_content, '\nemails:')
- table.insert(text_content, table.concat(lua_util.keys(emails), ', '))
- local new_text = table.concat(text_content, ' ')
+ -- Construct new message
+ table.insert(text_content, '\nurls:')
+ table.insert(text_content, table.concat(lua_util.keys(urls), ', '))
+ table.insert(text_content, '\nemails:')
+ table.insert(text_content, table.concat(lua_util.keys(emails), ', '))
+ local new_text = table.concat(text_content, ' ')
- -- Create new message structure
- local cur_boundary = '--XXX'
+ -- Create new message structure
+ local cur_boundary = '--XXX'
- -- Add headers
- out[#out + 1] = {
- string.format('Content-Type: multipart/mixed; boundary="%s"', cur_boundary),
- true
- }
- for _, hdr in ipairs(modified_headers) do
- if hdr.name ~= 'Content-Type' then
- out[#out + 1] = {
- string.format('%s: %s', hdr.name, hdr.value),
- true
- }
+ -- Add headers
+ out[#out + 1] = {
+ string.format('Content-Type: multipart/mixed; boundary="%s"', cur_boundary),
+ true
+ }
+ for _, hdr in ipairs(modified_headers) do
+ if hdr.name ~= 'Content-Type' then
+ out[#out + 1] = {
+ string.format('%s: %s', hdr.name, hdr.value),
+ true
+ }
+ end
end
- end
- out[#out + 1] = { '', true }
+ out[#out + 1] = { '', true }
- -- Add text part
- out[#out + 1] = {
- string.format('--%s', cur_boundary),
- true
- }
- out[#out + 1] = {
- 'Content-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: quoted-printable',
- true
- }
- out[#out + 1] = { '', true }
- out[#out + 1] = {
- rspamd_util.encode_qp(new_text, 76, task:get_newlines_type()),
- true
- }
+ -- Add text part
+ out[#out + 1] = {
+ string.format('--%s', cur_boundary),
+ true
+ }
+ out[#out + 1] = {
+ 'Content-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: quoted-printable',
+ true
+ }
+ out[#out + 1] = { '', true }
+ out[#out + 1] = {
+ rspamd_util.encode_qp(new_text, 76, task:get_newlines_type()),
+ true
+ }
- -- Close boundaries
- out[#out + 1] = {
- string.format('--%s--', cur_boundary),
- true
- }
+ -- Close boundaries
+ out[#out + 1] = {
+ string.format('--%s--', cur_boundary),
+ true
+ }
- state.out = out
- state.need_rewrite_ct = true
- state.new_ct = {
- type = 'multipart',
- subtype = 'mixed'
- }
+ state.out = out
+ state.need_rewrite_ct = true
+ state.new_ct = {
+ type = 'multipart',
+ subtype = 'mixed'
+ }
- return state
+ return state
+ end
end
return exports
diff --git a/lualib/lua_mime_types.lua b/lualib/lua_mime_types.lua
index ba55f9740..7b6688b3c 100644
--- a/lualib/lua_mime_types.lua
+++ b/lualib/lua_mime_types.lua
@@ -214,7 +214,7 @@ exports.full_extensions_map = {
{ "hxw", "application/octet-stream" },
{ "hxx", "text/plain" },
{ "i", "text/plain" },
- { "ico", "image/x-icon" },
+ { "ico", {"image/x-icon", "image/vnd.microsoft.icon"} },
{ "ics", { "text/calendar", "application/ics", "application/octet-stream" } },
{ "idl", "text/plain" },
{ "ief", "image/ief" },
diff --git a/lualib/lua_redis.lua b/lualib/lua_redis.lua
index 48ea1b6ed..195b7759f 100644
--- a/lualib/lua_redis.lua
+++ b/lualib/lua_redis.lua
@@ -26,7 +26,7 @@ local N = "lua_redis"
local db_schema = (ts.number / tostring + ts.string):is_optional():describe("Database number")
local common_schema = {
- timeout = (ts.number + ts.string / lutil.parse_time_interval):is_optional():describe("Connection timeout"),
+ timeout = (ts.number + ts.string / lutil.parse_time_interval):is_optional():describe("Connection timeout (seconds)"),
db = db_schema,
database = db_schema,
dbname = db_schema,
@@ -40,6 +40,7 @@ local common_schema = {
sentinel_master_maxerrors = (ts.number + ts.string / tonumber):is_optional():describe("Sentinel master max errors"),
sentinel_username = ts.string:is_optional():describe("Sentinel username"),
sentinel_password = ts.string:is_optional():describe("Sentinel password"),
+ redis_version = (ts.number + ts.string / tonumber):is_optional():describe("Redis server version (6 or 7)"),
}
local read_schema = lutil.table_merge({
@@ -357,6 +358,10 @@ local function process_redis_opts(options, redis_params)
redis_params['prefix'] = options['prefix']
end
+ if options['redis_version'] and not redis_params['redis_version'] then
+ redis_params['redis_version'] = tonumber(options['redis_version'])
+ end
+
if type(options['expand_keys']) == 'boolean' then
redis_params['expand_keys'] = options['expand_keys']
else
@@ -1124,9 +1129,9 @@ local function redis_make_request_taskless(ev_base, cfg, redis_params, key,
end
--[[[
--- @function lua_redis.redis_make_request_taskless(ev_base, redis_params, key, is_write, callback, command, args)
+-- @function lua_redis.redis_make_request_taskless(ev_base, cfg, redis_params, key, is_write, callback, command, args)
-- Sends a request to Redis in context where `task` is not available for some specific use-cases
--- Identical to redis_make_request() except in that first parameter is an `event base` object
+-- Identical to redis_make_request() except in that first parameter is an `event base` object and the second one is the 'config' object
--]]
exports.rspamd_redis_make_request_taskless = redis_make_request_taskless
@@ -1202,15 +1207,13 @@ local function prepare_redis_call(script)
return options
end
-local function is_all_servers_ready(script)
+local function is_any_server_ready(script)
for _, s in ipairs(script.servers_ready) do
- if s == "unsent" or s == "tempfail" then
- return false
+ if s == "done" then
+ return true
end
end
-
- -- We assume that permanent errors are not recoverable, so we will just skip those servers
- return true
+ return false
end
local function is_all_servers_failed(script)
@@ -1264,7 +1267,7 @@ local function load_script_task(script, task, is_write)
script.sha = data -- We assume that sha is the same on all servers
script.servers_ready[idx] = "done"
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1282,7 +1285,7 @@ local function load_script_task(script, task, is_write)
end
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1309,7 +1312,6 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
err, script.caller.short_src, script.caller.currentline)
opt.upstream:fail()
script.servers_ready[idx] = "failed"
- return
else
-- Assume temporary error
logger.infox(cfg, 'temporary error uploading script %s to %s: %s; registered from: %s:%s',
@@ -1317,7 +1319,6 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
opt.upstream:get_addr():to_string(true),
err, script.caller.short_src, script.caller.currentline)
script.servers_ready[idx] = "tempfail"
- return
end
else
opt.upstream:ok()
@@ -1330,7 +1331,7 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
script.servers_ready[idx] = "done"
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1348,7 +1349,7 @@ local function load_script_taskless(script, cfg, ev_base, is_write)
end
end
- if is_all_servers_ready(script) then
+ if is_any_server_ready(script) then
script_set_loaded(script)
elseif is_all_servers_failed(script) then
script.pending_upload = false
@@ -1477,6 +1478,10 @@ local function exec_redis_script(id, params, callback, keys, args)
script.sha = nil
script.loaded = nil
script.pending_upload = true
+ -- We must initialize all servers as we don't know here which one failed
+ for i, _ in ipairs(script.servers_ready) do
+ script.servers_ready[i] = "unsent"
+ end
-- Reload scripts if this has not been initiated yet
if params.task then
load_script_task(script, params.task)
@@ -1510,15 +1515,20 @@ local function exec_redis_script(id, params, callback, keys, args)
end
end
+ local redis_command = 'EVALSHA'
+ if not params.is_write and script.redis_params.redis_version and
+ script.redis_params.redis_version >= 7 then
+ redis_command = 'EVALSHA_RO'
+ end
if params.task then
if not rspamd_redis_make_request(params.task, script.redis_params,
- params.key, params.is_write, redis_cb, 'EVALSHA', redis_args) then
+ params.key, params.is_write, redis_cb, redis_command, redis_args) then
callback('Cannot make redis request', nil)
end
else
if not redis_make_request_taskless(params.ev_base, rspamd_config,
script.redis_params,
- params.key, params.is_write, redis_cb, 'EVALSHA', redis_args) then
+ params.key, params.is_write, redis_cb, redis_command, redis_args) then
callback('Cannot make redis request', nil)
end
end
@@ -1738,11 +1748,10 @@ exports.request = function(redis_params, attrs, req)
opts.dbname = redis_params.db
end
- lutil.debugm(N, 'perform generic request to redis server' ..
- ' (host=%s, timeout=%s): cmd: %s, arguments: %s', addr,
- opts.timeout, opts.cmd, opts.args)
-
if opts.callback then
+ lutil.debugm(N, 'perform generic async request to redis server' ..
+ ' (host=%s, timeout=%s): cmd: %s, arguments: %s', addr,
+ opts.timeout, opts.cmd, opts.args)
local ret, conn = rspamd_redis.make_request(opts)
if not ret then
logger.errx(log_obj, 'cannot execute redis request')
@@ -1752,6 +1761,9 @@ exports.request = function(redis_params, attrs, req)
return ret, conn, addr
else
-- Coroutines version
+ lutil.debugm(N, 'perform generic coroutine request to redis server' ..
+ ' (host=%s, timeout=%s): cmd: %s, arguments: %s', addr,
+ opts.timeout, opts.cmd, opts.args)
local ret, conn = rspamd_redis.connect_sync(opts)
if not ret then
logger.errx(log_obj, 'cannot execute redis request')
diff --git a/lualib/lua_scanners/cloudmark.lua b/lualib/lua_scanners/cloudmark.lua
index cb55a3bbf..12a60abf1 100644
--- a/lualib/lua_scanners/cloudmark.lua
+++ b/lualib/lua_scanners/cloudmark.lua
@@ -173,53 +173,6 @@ local function cloudmark_config(opts)
return nil
end
--- Converts a key-value map to the table representing multipart body, with the following values:
--- `data`: data of the part
--- `filename`: optional filename
--- `content-type`: content type of the element (optional)
--- `content-transfer-encoding`: optional CTE header
-local function table_to_multipart_body(tbl, boundary)
- local seen_data = false
- local out = {}
-
- for k, v in pairs(tbl) do
- if v.data then
- seen_data = true
- table.insert(out, string.format('--%s\r\n', boundary))
- if v.filename then
- table.insert(out,
- string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
- k, v.filename))
- else
- table.insert(out,
- string.format('Content-Disposition: form-data; name="%s"\r\n', k))
- end
- if v['content-type'] then
- table.insert(out,
- string.format('Content-Type: %s\r\n', v['content-type']))
- else
- table.insert(out, 'Content-Type: text/plain\r\n')
- end
- if v['content-transfer-encoding'] then
- table.insert(out,
- string.format('Content-Transfer-Encoding: %s\r\n',
- v['content-transfer-encoding']))
- else
- table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
- end
- table.insert(out, '\r\n')
- table.insert(out, v.data)
- table.insert(out, '\r\n')
- end
- end
-
- if seen_data then
- table.insert(out, string.format('--%s--\r\n', boundary))
- end
-
- return out
-end
-
local function get_specific_symbol(scores_symbols, score)
local selected
local sel_thr = -1
@@ -263,7 +216,8 @@ local function parse_cloudmark_reply(task, rule, body)
if obj.analysis then
-- Report analysis string
- rspamd_logger.infox(task, 'cloudmark report string: %s', obj.analysis)
+ local qid = task:get_queue_id() or 'unknown'
+ rspamd_logger.infox(task, 'qid: <%s>, cloudmark report string: %s', qid, obj.analysis)
end
local score = tonumber(obj.score) or 0
@@ -345,7 +299,9 @@ local function cloudmark_check(task, content, digest, rule, maybe_part)
local fip = task:get_from_ip()
if fip and fip:is_valid() then
- request['connIp'] = tostring(fip)
+ request['connIp'] = {
+ data = tostring(fip)
+ }
end
local hostname = task:get_hostname()
@@ -356,7 +312,7 @@ local function cloudmark_check(task, content, digest, rule, maybe_part)
local request_data = {
task = task,
url = url,
- body = table_to_multipart_body(request, static_boundary),
+ body = lua_util.table_to_multipart_body(request, static_boundary),
headers = {
['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
},
diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua
index 62b38c87e..636212b1f 100644
--- a/lualib/lua_util.lua
+++ b/lualib/lua_util.lua
@@ -1805,4 +1805,55 @@ exports.symbols_priorities = {
low = 0,
}
+---[[[
+-- @function lua_util.table_to_multipart_body(tbl, boundary)
+-- Converts a key-value map to the table representing multipart body, with the following values:
+-- `data`: data of the part
+-- `filename`: optional filename
+-- `content-type`: content type of the element (optional)
+-- `content-transfer-encoding`: optional CTE header
+local function table_to_multipart_body(tbl, boundary)
+ local seen_data = false
+ local out = {}
+
+ for k, v in pairs(tbl) do
+ if v.data then
+ seen_data = true
+ table.insert(out, string.format('--%s\r\n', boundary))
+ if v.filename then
+ table.insert(out,
+ string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
+ k, v.filename))
+ else
+ table.insert(out,
+ string.format('Content-Disposition: form-data; name="%s"\r\n', k))
+ end
+ if v['content-type'] then
+ table.insert(out,
+ string.format('Content-Type: %s\r\n', v['content-type']))
+ else
+ table.insert(out, 'Content-Type: text/plain\r\n')
+ end
+ if v['content-transfer-encoding'] then
+ table.insert(out,
+ string.format('Content-Transfer-Encoding: %s\r\n',
+ v['content-transfer-encoding']))
+ else
+ table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
+ end
+ table.insert(out, '\r\n')
+ table.insert(out, v.data)
+ table.insert(out, '\r\n')
+ end
+ end
+
+ if seen_data then
+ table.insert(out, string.format('--%s--\r\n', boundary))
+ end
+
+ return out
+end
+
+exports.table_to_multipart_body = table_to_multipart_body
+
return exports
diff --git a/lualib/plugins/neural.lua b/lualib/plugins/neural.lua
index 6e88ef21c..545214669 100644
--- a/lualib/plugins/neural.lua
+++ b/lualib/plugins/neural.lua
@@ -757,7 +757,7 @@ local function process_rules_settings()
type = 'set',
})
lua_redis.register_prefix(selt.prefix .. '_\\d+_ham_set', N,
- string.format('NN learning set (spam) for rule "%s"; settings id "%s"',
+ string.format('NN learning set (ham) for rule "%s"; settings id "%s"',
rule.prefix, selt.name), {
persistent = true,
type = 'set',
diff --git a/lualib/plugins/rbl.lua b/lualib/plugins/rbl.lua
index af5d6bd91..074fc7f0c 100644
--- a/lualib/plugins/rbl.lua
+++ b/lualib/plugins/rbl.lua
@@ -32,6 +32,7 @@ local check_types = {
content_urls = {},
numeric_urls = {},
emails = {},
+ images = {},
replyto = {},
dkim = {},
rdns = {
@@ -165,8 +166,6 @@ local function convert_checks(rule, name)
end
end
- rule[check] = check_type
-
if not check_type.connfilter then
all_connfilter = false
end
@@ -176,6 +175,8 @@ local function convert_checks(rule, name)
name, check)
return nil
end
+
+ rule[check] = true
else
rspamd_logger.infox(rspamd_config, 'disable check %s in %s: excluded explicitly',
check, name)
diff --git a/lualib/redis_scripts/bayes_cache_learn.lua b/lualib/redis_scripts/bayes_cache_learn.lua
index d8a2d878e..7d44a73ef 100644
--- a/lualib/redis_scripts/bayes_cache_learn.lua
+++ b/lualib/redis_scripts/bayes_cache_learn.lua
@@ -1,7 +1,7 @@
-- Lua script to perform cache checking for bayes classification
-- This script accepts the following parameters:
-- key1 - cache id
--- key3 - is spam (1 or 0)
+-- key2 - is spam (1 or 0)
-- key3 - configuration table in message pack
local cache_id = KEYS[1]
diff --git a/lualib/redis_scripts/neural_save_unlock.lua b/lualib/redis_scripts/neural_save_unlock.lua
index 5af1ddcde..7ea7dc2e5 100644
--- a/lualib/redis_scripts/neural_save_unlock.lua
+++ b/lualib/redis_scripts/neural_save_unlock.lua
@@ -12,13 +12,14 @@
local now = tonumber(KEYS[6])
redis.call('ZADD', KEYS[2], now, KEYS[4])
redis.call('HSET', KEYS[1], 'ann', KEYS[3])
-redis.call('DEL', KEYS[1] .. '_spam_set')
-redis.call('DEL', KEYS[1] .. '_ham_set')
-redis.call('HDEL', KEYS[1], 'lock')
-redis.call('HDEL', KEYS[7], 'lock')
-redis.call('EXPIRE', KEYS[1], tonumber(KEYS[5]))
redis.call('HSET', KEYS[1], 'roc_thresholds', KEYS[8])
if KEYS[9] then
redis.call('HSET', KEYS[1], 'pca', KEYS[9])
end
-return 1 \ No newline at end of file
+redis.call('HDEL', KEYS[1], 'lock')
+redis.call('HDEL', KEYS[7], 'lock')
+redis.call('EXPIRE', KEYS[1], tonumber(KEYS[5]))
+ -- expire in 10m, to not face race condition with other rspamd replicas refill deleted keys
+redis.call('EXPIRE', KEYS[7] .. '_spam_set', 600)
+redis.call('EXPIRE', KEYS[7] .. '_ham_set', 600)
+return 1
diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua
index f8c7fc4f7..a20e47e23 100644
--- a/lualib/rspamadm/mime.lua
+++ b/lualib/rspamadm/mime.lua
@@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-]]--
+]] --
local argparse = require "argparse"
local ansicolors = require "ansicolors"
@@ -35,94 +35,94 @@ local parser = argparse()
:require_command(true)
parser:option "-c --config"
- :description "Path to config file"
- :argname("<cfg>")
- :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
+ :description "Path to config file"
+ :argname("<cfg>")
+ :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
parser:mutex(
- parser:flag "-j --json"
- :description "JSON output",
- parser:flag "-U --ucl"
- :description "UCL output",
- parser:flag "-M --messagepack"
- :description "MessagePack output"
+ parser:flag "-j --json"
+ :description "JSON output",
+ parser:flag "-U --ucl"
+ :description "UCL output",
+ parser:flag "-M --messagepack"
+ :description "MessagePack output"
)
parser:flag "-C --compact"
- :description "Use compact format"
+ :description "Use compact format"
parser:flag "--no-file"
- :description "Do not print filename"
+ :description "Do not print filename"
-- Extract subcommand
local extract = parser:command "extract ex e"
- :description "Extracts data from MIME messages"
+ :description "Extracts data from MIME messages"
extract:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
extract:flag "-t --text"
- :description "Extracts plain text data from a message"
+ :description "Extracts plain text data from a message"
extract:flag "-H --html"
- :description "Extracts htm data from a message"
+ :description "Extracts htm data from a message"
extract:option "-o --output"
- :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
- :argname("<type>")
- :convert {
- raw = "raw",
- content = "content",
- oneline = "content_oneline",
- decoded = "raw_parsed",
- decoded_utf = "raw_utf"
-}
- :default "content"
+ :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
+ :argname("<type>")
+ :convert {
+ raw = "raw",
+ content = "content",
+ oneline = "content_oneline",
+ decoded = "raw_parsed",
+ decoded_utf = "raw_utf"
+ }
+ :default "content"
extract:flag "-w --words"
- :description "Extracts words"
+ :description "Extracts words"
extract:flag "-p --part"
- :description "Show part info"
+ :description "Show part info"
extract:flag "-s --structure"
- :description "Show structure info (e.g. HTML tags)"
+ :description "Show structure info (e.g. HTML tags)"
extract:flag "-i --invisible"
- :description "Show invisible content for HTML parts"
+ :description "Show invisible content for HTML parts"
extract:option "-F --words-format"
- :description "Words format ('stem', 'norm', 'raw', 'full')"
- :argname("<type>")
- :convert {
- stem = "stem",
- norm = "norm",
- raw = "raw",
- full = "full",
-}
- :default "stem"
+ :description "Words format ('stem', 'norm', 'raw', 'full')"
+ :argname("<type>")
+ :convert {
+ stem = "stem",
+ norm = "norm",
+ raw = "raw",
+ full = "full",
+ }
+ :default "stem"
local stat = parser:command "stat st s"
- :description "Extracts statistical data from MIME messages"
+ :description "Extracts statistical data from MIME messages"
stat:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
stat:mutex(
- stat:flag "-m --meta"
- :description "Lua metatokens",
- stat:flag "-b --bayes"
- :description "Bayes tokens",
- stat:flag "-F --fuzzy"
- :description "Fuzzy hashes"
+ stat:flag "-m --meta"
+ :description "Lua metatokens",
+ stat:flag "-b --bayes"
+ :description "Bayes tokens",
+ stat:flag "-F --fuzzy"
+ :description "Fuzzy hashes"
)
stat:flag "-s --shingles"
:description "Show shingles for fuzzy hashes"
local urls = parser:command "urls url u"
- :description "Extracts URLs from MIME messages"
+ :description "Extracts URLs from MIME messages"
urls:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
urls:mutex(
- urls:flag "-t --tld"
- :description "Get TLDs only",
- urls:flag "-H --host"
- :description "Get hosts only",
- urls:flag "-f --full"
- :description "Show piecewise urls as processed by Rspamd"
+ urls:flag "-t --tld"
+ :description "Get TLDs only",
+ urls:flag "-H --host"
+ :description "Get hosts only",
+ urls:flag "-f --full"
+ :description "Show piecewise urls as processed by Rspamd"
)
urls:flag "-u --unique"
@@ -135,67 +135,75 @@ urls:flag "-r --reverse"
:description "Reverse sort order"
local modify = parser:command "modify mod m"
- :description "Modifies MIME message"
+ :description "Modifies MIME message"
modify:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
modify:option "-a --add-header"
- :description "Adds specific header"
- :argname "<header=value>"
- :count "*"
+ :description "Adds specific header"
+ :argname "<header=value>"
+ :count "*"
modify:option "-r --remove-header"
- :description "Removes specific header (all occurrences)"
- :argname "<header>"
- :count "*"
+ :description "Removes specific header (all occurrences)"
+ :argname "<header>"
+ :count "*"
modify:option "-R --rewrite-header"
- :description "Rewrites specific header, uses Lua string.format pattern"
- :argname "<header=pattern>"
- :count "*"
+ :description "Rewrites specific header, uses Lua string.format pattern"
+ :argname "<header=pattern>"
+ :count "*"
modify:option "-t --text-footer"
- :description "Adds footer to text/plain parts from a specific file"
- :argname "<file>"
+ :description "Adds footer to text/plain parts from a specific file"
+ :argname "<file>"
modify:option "-H --html-footer"
- :description "Adds footer to text/html parts from a specific file"
- :argname "<file>"
+ :description "Adds footer to text/html parts from a specific file"
+ :argname "<file>"
local strip = parser:command "strip"
- :description "Strip attachments from a message"
+ :description "Strip attachments from a message"
strip:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
strip:flag "-i --keep-images"
- :description "Keep images"
+ :description "Keep images"
strip:option "--min-text-size"
- :description "Minimal text size to keep"
- :argname "<size>"
- :convert(tonumber)
- :default(0)
+ :description "Minimal text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(0)
strip:option "--max-text-size"
- :description "Max text size to keep"
- :argname "<size>"
- :convert(tonumber)
- :default(math.huge)
+ :description "Max text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(math.huge)
local anonymize = parser:command "anonymize"
- :description "Try to remove sensitive information from a message"
+ :description "Try to remove sensitive information from a message"
anonymize:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
anonymize:option "--exclude-header -X"
- :description "Exclude specific headers from anonymization"
- :argname "<header>"
- :count "*"
+ :description "Exclude specific headers from anonymization"
+ :argname "<header>"
+ :count "*"
anonymize:option "--include-header -I"
- :description "Include specific headers from anonymization"
- :argname "<header>"
- :count "*"
+ :description "Include specific headers from anonymization"
+ :argname "<header>"
+ :count "*"
+anonymize:flag "--gpt"
+ :description "Use LLM model for anonymization (requires GPT plugin to be configured)"
+anonymize:option "--model"
+ :description "Model to use for anonymization"
+ :argname "<model>"
+anonymize:option "--prompt"
+ :description "Prompt to use for anonymization"
+ :argname "<prompt>"
local sign = parser:command "sign"
- :description "Performs DKIM signing"
+ :description "Performs DKIM signing"
sign:argument "file"
:description "File to process"
:argname "<file>"
@@ -217,33 +225,33 @@ sign:option "-t --type"
:description "ARC or DKIM signing"
:argname("<arc|dkim>")
:convert {
- ['arc'] = 'arc',
- ['dkim'] = 'dkim',
-}
+ ['arc'] = 'arc',
+ ['dkim'] = 'dkim',
+ }
:default 'dkim'
sign:option "-o --output"
:description "Output format"
:argname("<message|signature>")
:convert {
- ['message'] = 'message',
- ['signature'] = 'signature',
-}
+ ['message'] = 'message',
+ ['signature'] = 'signature',
+ }
:default 'message'
local dump = parser:command "dump"
- :description "Dumps a raw message in different formats"
+ :description "Dumps a raw message in different formats"
dump:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
-- Duplicate format for convenience
dump:mutex(
- parser:flag "-j --json"
- :description "JSON output",
- parser:flag "-U --ucl"
- :description "UCL output",
- parser:flag "-M --messagepack"
- :description "MessagePack output"
+ parser:flag "-j --json"
+ :description "JSON output",
+ parser:flag "-U --ucl"
+ :description "UCL output",
+ parser:flag "-M --messagepack"
+ :description "MessagePack output"
)
dump:flag "-s --split"
:description "Split the output file contents such that no content is embedded"
@@ -252,7 +260,7 @@ dump:option "-o --outdir"
:description "Output directory"
:argname("<directory>")
-local function load_config(opts)
+local function load_config(opts, load_tokenizers)
local _r, err = rspamd_config:load_ucl(opts['config'])
if not _r then
@@ -265,23 +273,46 @@ local function load_config(opts)
rspamd_logger.errx('cannot process %s: %s', opts['config'], err)
os.exit(1)
end
+
+ -- Load custom tokenizers if requested
+ if load_tokenizers then
+ local success, tokenizer_err = rspamd_config:load_custom_tokenizers()
+ if not success then
+ rspamd_logger.errx('cannot load custom tokenizers: %s', tokenizer_err or 'unknown error')
+ -- Don't exit here as custom tokenizers are optional
+ rspamd_logger.warnx('proceeding without custom tokenizers')
+ end
+ end
+end
+
+-- Helper function to ensure proper cleanup of tokenizers
+local function cleanup_tokenizers()
+ if rspamd_config then
+ rspamd_config:unload_custom_tokenizers()
+ end
end
-local function load_task(opts, fname)
+local function load_task(_, fname)
if not fname then
fname = '-'
end
- local res, task = rspamd_task.load_from_file(fname, rspamd_config)
+ local task = rspamd_task.create(rspamd_config, rspamadm_ev_base)
+ task:set_session(rspamadm_session)
+ task:set_resolver(rspamadm_dns_resolver)
+
+ local res = task:load_from_file(fname)
if not res then
parser:error(string.format('cannot read message from %s: %s', fname,
- task))
+ task))
+ return nil
end
if not task:process_message() then
parser:error(string.format('cannot read message from %s: %s', fname,
- 'failed to parse'))
+ 'failed to parse'))
+ return nil
end
return task
@@ -321,7 +352,6 @@ local function print_elts(elts, opts, func)
io.write(ucl.to_format(elts, output_fmt(opts)))
else
fun.each(function(fname, elt)
-
if not opts.json and not opts.ucl then
if func then
elt = fun.map(func, elt)
@@ -343,7 +373,7 @@ local function extract_handler(opts)
if opts.words then
-- Enable stemming and urls detection
- load_config(opts)
+ load_config(opts, true) -- Load with custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
rspamd_config:init_subsystem('langdet')
end
@@ -358,39 +388,38 @@ local function extract_handler(opts)
if not opts.json and not opts.ucl then
table.insert(out,
- rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
- part:get_mimepart():get_digest():sub(1, 8),
- t,
- part:get_language(),
- part:get_length(), part:get_raw_length(),
- part:get_words_count()))
+ rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
+ part:get_mimepart():get_digest():sub(1, 8),
+ t,
+ part:get_language(),
+ part:get_length(), part:get_raw_length(),
+ part:get_words_count()))
table.insert(out,
- rspamd_logger.slog('Stats: %s',
- fun.foldl(function(acc, k, v)
- if acc ~= '' then
- return string.format('%s, %s:%s', acc, k, v)
- else
- return string.format('%s:%s', k, v)
- end
- end, '', part:get_stats())))
+ rspamd_logger.slog('Stats: %s',
+ fun.foldl(function(acc, k, v)
+ if acc ~= '' then
+ return string.format('%s, %s:%s', acc, k, v)
+ else
+ return string.format('%s:%s', k, v)
+ end
+ end, '', part:get_stats())))
end
end
end
local function maybe_print_mime_part_info(part, out)
if opts.part then
-
if not opts.json and not opts.ucl then
local mtype, msubtype = part:get_type()
local det_mtype, det_msubtype = part:get_detected_type()
table.insert(out,
- rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
- part:get_digest():sub(1, 8),
- mtype, msubtype,
- det_mtype, det_msubtype,
- part:get_filename(),
- part:get_detected_ext(),
- part:get_length()))
+ rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
+ part:get_digest():sub(1, 8),
+ mtype, msubtype,
+ det_mtype, det_msubtype,
+ part:get_filename(),
+ part:get_detected_ext(),
+ part:get_length()))
end
end
end
@@ -402,17 +431,17 @@ local function extract_handler(opts)
return table.concat(words, ' ')
else
return table.concat(
- fun.totable(
- fun.map(function(w)
- -- [1] - stemmed word
- -- [2] - normalised word
- -- [3] - raw word
- -- [4] - flags (table of strings)
- return string.format('%s|%s|%s(%s)',
- w[3], w[2], w[1], table.concat(w[4], ','))
- end, words)
- ),
- ' '
+ fun.totable(
+ fun.map(function(w)
+ -- [1] - stemmed word
+ -- [2] - normalised word
+ -- [3] - raw word
+ -- [4] - flags (table of strings)
+ return string.format('%s|%s|%s(%s)',
+ w[3], w[2], w[1], table.concat(w[4], ','))
+ end, words)
+ ),
+ ' '
)
end
end
@@ -429,7 +458,7 @@ local function extract_handler(opts)
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], 'meta_words: ' ..
- print_words(task:get_meta_words(how_words), how_words == 'full'))
+ print_words(task:get_meta_words(how_words), how_words == 'full'))
end
if opts.text or opts.html then
@@ -452,7 +481,7 @@ local function extract_handler(opts)
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], print_words(part:get_words(how_words),
- how_words == 'full'))
+ how_words == 'full'))
else
table.insert(out_elts[fname], tostring(part:get_content(how)))
end
@@ -466,7 +495,7 @@ local function extract_handler(opts)
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], print_words(part:get_words(how_words),
- how_words == 'full'))
+ how_words == 'full'))
else
if opts.structure then
local hc = part:get_html()
@@ -475,11 +504,11 @@ local function extract_handler(opts)
local fun = require "fun"
if type(elt) == 'table' then
return table.concat(fun.totable(
- fun.map(
- function(t)
- return rspamd_logger.slog("%s", t)
- end,
- elt)), '\n')
+ fun.map(
+ function(t)
+ return rspamd_logger.slog("%s", t)
+ end,
+ elt)), '\n')
else
return rspamd_logger.slog("%s", elt)
end
@@ -510,7 +539,7 @@ local function extract_handler(opts)
if opts.invisible then
local hc = part:get_html()
table.insert(out_elts[fname], string.format('invisible content: %s',
- tostring(hc:get_invisible())))
+ tostring(hc:get_invisible())))
end
end
end
@@ -530,13 +559,18 @@ local function extract_handler(opts)
for _, task in ipairs(tasks) do
task:destroy()
end
+
+ -- Cleanup custom tokenizers if they were loaded
+ if opts.words then
+ cleanup_tokenizers()
+ end
end
local function stat_handler(opts)
local fun = require "fun"
local out_elts = {}
- load_config(opts)
+ load_config(opts, true) -- Load with custom tokenizers for stat generation
rspamd_url.init(rspamd_config:get_tld_path())
rspamd_config:init_subsystem('langdet,stat') -- Needed to gen stat tokens
@@ -557,10 +591,10 @@ local function stat_handler(opts)
out_elts[fname] = bt
process_func = function(e)
return string.format('%s (%d): "%s"+"%s", [%s]', e.data, e.win, e.t1 or "",
- e.t2 or "", table.concat(fun.totable(
- fun.map(function(k)
- return k
- end, e.flags)), ","))
+ e.t2 or "", table.concat(fun.totable(
+ fun.map(function(k)
+ return k
+ end, e.flags)), ","))
end
elseif opts.fuzzy then
local parts = task:get_parts() or {}
@@ -587,16 +621,16 @@ local function stat_handler(opts)
digest = digest,
shingles = shingles,
type = string.format('%s/%s',
- ({ part:get_type() })[1],
- ({ part:get_type() })[2])
+ ({ part:get_type() })[1],
+ ({ part:get_type() })[2])
})
else
table.insert(out_elts[fname], {
digest = part:get_digest(),
file = part:get_filename(),
type = string.format('%s/%s',
- ({ part:get_type() })[1],
- ({ part:get_type() })[2])
+ ({ part:get_type() })[1],
+ ({ part:get_type() })[2])
})
end
end
@@ -607,10 +641,13 @@ local function stat_handler(opts)
end
print_elts(out_elts, opts, process_func)
+
+ -- Cleanup custom tokenizers
+ cleanup_tokenizers()
end
local function urls_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- URLs don't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
local out_elts = {}
@@ -750,7 +787,7 @@ local function newline(task)
end
local function modify_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Modification doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
local function read_file(file)
@@ -790,10 +827,10 @@ local function modify_handler(opts)
if hname == name then
local new_value = string.format(hpattern, hdr.decoded)
new_value = string.format('%s:%s%s',
- name, hdr.separator,
- rspamd_util.fold_header(name,
- rspamd_util.mime_header_encode(new_value),
- task:get_newlines_type()))
+ name, hdr.separator,
+ rspamd_util.fold_header(name,
+ rspamd_util.mime_header_encode(new_value),
+ task:get_newlines_type()))
out[#out + 1] = new_value
return
end
@@ -802,12 +839,12 @@ local function modify_handler(opts)
if rewrite.need_rewrite_ct then
if name:lower() == 'content-type' then
local nct = string.format('%s: %s/%s; charset=utf-8',
- 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
+ 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
out[#out + 1] = nct
return
elseif name:lower() == 'content-transfer-encoding' then
out[#out + 1] = string.format('%s: %s',
- 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+ 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
seen_cte = true
return
end
@@ -823,13 +860,13 @@ local function modify_handler(opts)
if hname and hvalue then
out[#out + 1] = string.format('%s: %s', hname,
- rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
+ rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
end
end
if not seen_cte and rewrite.need_rewrite_ct then
out[#out + 1] = string.format('%s: %s',
- 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+ 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
end
-- End of headers
@@ -869,7 +906,7 @@ local function modify_handler(opts)
end
local function sign_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Signing doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
local lua_dkim = require("lua_ffi").dkim
@@ -913,11 +950,11 @@ local function sign_handler(opts)
io.flush()
else
local dkim_hdr = string.format('%s: %s%s',
- 'DKIM-Signature',
- rspamd_util.fold_header('DKIM-Signature',
- rspamd_util.mime_header_encode(sig),
- task:get_newlines_type()),
- newline(task))
+ 'DKIM-Signature',
+ rspamd_util.fold_header('DKIM-Signature',
+ rspamd_util.mime_header_encode(sig),
+ task:get_newlines_type()),
+ newline(task))
io.write(dkim_hdr)
io.flush()
task:get_content():save_in_file(1)
@@ -928,7 +965,7 @@ local function sign_handler(opts)
end
local function strip_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Stripping doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
for _, fname in ipairs(opts.file) do
@@ -984,7 +1021,7 @@ local function strip_handler(opts)
end
local function anonymize_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Anonymization doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
for _, fname in ipairs(opts.file) do
@@ -1089,7 +1126,7 @@ local function get_dump_content(task, opts, fname)
end
local function dump_handler(opts)
- load_config(opts)
+ load_config(opts, false) -- Dumping doesn't need custom tokenizers
rspamd_url.init(rspamd_config:get_tld_path())
for _, fname in ipairs(opts.file) do
diff --git a/lualib/rspamadm/statistics_dump.lua b/lualib/rspamadm/statistics_dump.lua
index 6bc045850..6a08d11fd 100644
--- a/lualib/rspamadm/statistics_dump.lua
+++ b/lualib/rspamadm/statistics_dump.lua
@@ -42,6 +42,12 @@ parser:option "-c --config"
:argname("<cfg>")
:default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
+parser:option "-b --batch-size"
+ :description "Number of entries to process at once"
+ :argname("<elts>")
+ :convert(tonumber)
+ :default(1000)
+
-- Extract subcommand
local dump = parser:command "dump d"
:description "Dump bayes statistics"
@@ -54,7 +60,7 @@ dump:mutex(
dump:flag "-c --compress"
:description "Compress output"
dump:option "-b --batch-size"
- :description "Number of entires to process at once"
+ :description "Number of entries to process at once"
:argname("<elts>")
:convert(tonumber)
:default(1000)
@@ -68,12 +74,12 @@ restore:argument "file"
:argname "<file>"
:args "*"
restore:option "-b --batch-size"
- :description "Number of entires to process at once"
+ :description "Number of entries to process at once"
:argname("<elts>")
:convert(tonumber)
:default(1000)
restore:option "-m --mode"
- :description "Number of entires to process at once"
+ :description "Number of entries to process at once"
:argname("<append|subtract|replace>")
:convert {
['append'] = 'append',
@@ -287,11 +293,11 @@ local function dump_pattern(conn, pattern, opts, out, key)
-- Do not write the last chunk of out as it will be processed afterwards
if cursor ~= 0 then
if opts.cdb then
- dump_out(out, opts, false)
- clear_fcn(out)
- else
dump_cdb(out, opts, false, key)
out[key].elts = {}
+ else
+ dump_out(out, opts, false)
+ clear_fcn(out)
end
elseif opts.cdb then
dump_cdb(out, opts, true, key)
@@ -541,4 +547,4 @@ return {
aliases = { 'stat_dump', 'bayes_dump' },
handler = handler,
description = parser._description
-} \ No newline at end of file
+}