aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-12-12 17:54:01 +0600
committerGitHub <noreply@github.com>2024-12-12 17:54:01 +0600
commit1e63b18528f33f89757709e1851fa3499e512e96 (patch)
treee45d5b884ed23f7fb303743171edc8ef99ca41ff
parent7e6bec3c411cf368f37c1701141a5f65895b741a (diff)
parent3197ce7e6ad44cd4afc0f0013dfc03f8a4deeec8 (diff)
downloadrspamd-1e63b18528f33f89757709e1851fa3499e512e96.tar.gz
rspamd-1e63b18528f33f89757709e1851fa3499e512e96.zip
Merge pull request #5254 from rspamd/vstakhov-phishing-fixes
Fix phishing symbol for the same domains
-rw-r--r--src/lua/lua_util.c13
-rw-r--r--src/plugins/lua/antivirus.lua4
-rw-r--r--src/plugins/lua/arc.lua6
-rw-r--r--src/plugins/lua/clickhouse.lua4
-rw-r--r--src/plugins/lua/clustering.lua10
-rw-r--r--src/plugins/lua/elastic.lua172
-rw-r--r--src/plugins/lua/external_services.lua4
-rw-r--r--src/plugins/lua/neural.lua10
-rw-r--r--src/plugins/lua/p0f.lua2
-rw-r--r--src/plugins/lua/phishing.lua50
-rw-r--r--src/plugins/lua/reputation.lua4
-rw-r--r--src/plugins/lua/url_redirector.lua10
12 files changed, 164 insertions, 125 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 14994751c..ce4d9f67c 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -2152,6 +2152,17 @@ lua_util_is_utf_spoofed(lua_State *L)
return 1;
}
+
+ /* Disable single script confusables, as it is not what we want to check */
+ uspoof_setChecks(spc,
+ USPOOF_CONFUSABLE & ~USPOOF_SINGLE_SCRIPT_CONFUSABLE,
+ &uc_err);
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err("Cannot set proper checks for uspoof: %s", u_errorName(uc_err));
+ lua_pushboolean(L, false);
+ uspoof_close(spc);
+ return 1;
+ }
}
ret = uspoof_areConfusableUTF8(spc, s1, l1, s2, l2, &uc_err);
@@ -2174,7 +2185,7 @@ lua_util_is_utf_spoofed(lua_State *L)
if (uc_err != U_ZERO_ERROR) {
msg_err("Cannot set proper checks for uspoof: %s", u_errorName(uc_err));
lua_pushboolean(L, false);
- uspoof_close(spc);
+ uspoof_close(spc_sgl);
return 1;
}
}
diff --git a/src/plugins/lua/antivirus.lua b/src/plugins/lua/antivirus.lua
index e39ddc5ba..5337f6666 100644
--- a/src/plugins/lua/antivirus.lua
+++ b/src/plugins/lua/antivirus.lua
@@ -258,7 +258,7 @@ if opts and type(opts) == 'table' then
for _, p in ipairs(m['patterns']) do
if type(p) == 'table' then
for sym in pairs(p) do
- rspamd_logger.debugm(N, rspamd_config, 'registering: %1', {
+ lua_util.debugm(N, rspamd_config, 'registering: %1', {
type = 'virtual',
name = sym,
parent = m['symbol'],
@@ -292,7 +292,7 @@ if opts and type(opts) == 'table' then
for _, p in ipairs(m['patterns_fail']) do
if type(p) == 'table' then
for sym in pairs(p) do
- rspamd_logger.debugm(N, rspamd_config, 'registering: %1', {
+ lua_util.debugm(N, rspamd_config, 'registering: %1', {
type = 'virtual',
name = sym,
parent = m['symbol'],
diff --git a/src/plugins/lua/arc.lua b/src/plugins/lua/arc.lua
index 90e254e78..fb5dd93e6 100644
--- a/src/plugins/lua/arc.lua
+++ b/src/plugins/lua/arc.lua
@@ -517,10 +517,10 @@ local function arc_sign_seal(task, params, header)
local ar_header = task:get_header('Authentication-Results')
if ar_header then
- rspamd_logger.debugm(N, task, 'reuse authentication results header for ARC')
+ lua_util.debugm(N, task, 'reuse authentication results header for ARC')
cur_auth_results = ar_header
else
- rspamd_logger.debugm(N, task, 'cannot reuse authentication results, header is missing')
+ lua_util.debugm(N, task, 'cannot reuse authentication results, header is missing')
cur_auth_results = lua_auth_results.gen_auth_results(task, ar_settings) or ''
end
else
@@ -639,7 +639,7 @@ local function prepare_arc_selector(task, sel)
ar_header = ar_header or ""
for k, v in string.gmatch(ar_header, "(%w+)=(%w+)") do
if k == 'arc' then
- return v
+ return v
end
end
return nil
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua
index 25eabc760..16a8ad4ec 100644
--- a/src/plugins/lua/clickhouse.lua
+++ b/src/plugins/lua/clickhouse.lua
@@ -1146,7 +1146,7 @@ local function upload_clickhouse_schema(upstream, ev_base, cfg, initial)
errored = true
return
end
- rspamd_logger.debugm(N, rspamd_config, 'uploaded clickhouse schema element %s to %s: %s',
+ lua_util.debugm(N, rspamd_config, 'uploaded clickhouse schema element %s to %s: %s',
v, upstream:get_addr():to_string(true), reply)
end
@@ -1159,7 +1159,7 @@ local function upload_clickhouse_schema(upstream, ev_base, cfg, initial)
if initial == v[2] then
return lua_util.template(v[1], { SCHEMA_VERSION = tostring(schema_version) })
else
- rspamd_logger.debugm(N, rspamd_config, 'skip clickhouse schema element %s: schema already exists',
+ lua_util.debugm(N, rspamd_config, 'skip clickhouse schema element %s: schema already exists',
v)
end
end
diff --git a/src/plugins/lua/clustering.lua b/src/plugins/lua/clustering.lua
index d97bdb97e..a00ea9f43 100644
--- a/src/plugins/lua/clustering.lua
+++ b/src/plugins/lua/clustering.lua
@@ -135,7 +135,7 @@ local function clusterting_filter_cb(task, rule)
end
if not cluster_selector or not source_selector then
- rspamd_logger.debugm(N, task, 'skip rule %s, selectors: source="%s", cluster="%s"',
+ lua_util.debugm(N, task, 'skip rule %s, selectors: source="%s", cluster="%s"',
rule.name, source_selector, cluster_selector)
return
end
@@ -153,7 +153,7 @@ local function clusterting_filter_cb(task, rule)
-- We have seen this element in ham mostly, so subtract average it from the size score
final_score = math.min(1.0, size_score - cluster_score / cur_elts)
end
- rspamd_logger.debugm(N, task,
+ lua_util.debugm(N, task,
'processed rule %s, selectors: source="%s", cluster="%s"; data: %s elts, %s score, %s elt score',
rule.name, source_selector, cluster_selector, cur_elts, total_score, element_score)
if final_score > 0.1 then
@@ -205,7 +205,7 @@ local function clusterting_idempotent_cb(task, rule)
elseif verdict == 'junk' then
score = rule.junk_mult
else
- rspamd_logger.debugm(N, task, 'skip rule %s, verdict=%s',
+ lua_util.debugm(N, task, 'skip rule %s, verdict=%s',
rule.name, verdict)
return
end
@@ -218,7 +218,7 @@ local function clusterting_idempotent_cb(task, rule)
end
if not cluster_selector or not source_selector then
- rspamd_logger.debugm(N, task, 'skip rule %s, selectors: source="%s", cluster="%s"',
+ lua_util.debugm(N, task, 'skip rule %s, selectors: source="%s", cluster="%s"',
rule.name, source_selector, cluster_selector)
return
end
@@ -228,7 +228,7 @@ local function clusterting_idempotent_cb(task, rule)
rspamd_logger.errx(task, 'got error while getting clustering keys %s: %s',
source_selector, err)
else
- rspamd_logger.debugm(N, task, 'set clustering key for %s: %s{%s} = %s',
+ lua_util.debugm(N, task, 'set clustering key for %s: %s{%s} = %s',
source_selector, "unknown error")
end
end
diff --git a/src/plugins/lua/elastic.lua b/src/plugins/lua/elastic.lua
index f3eb3cc4f..8bed9fcf4 100644
--- a/src/plugins/lua/elastic.lua
+++ b/src/plugins/lua/elastic.lua
@@ -72,7 +72,7 @@ local settings = {
enabled = true,
version = {
autodetect_enabled = true,
- autodetect_max_fail = 30,
+ autodetect_max_fail = 30,
-- override works only if autodetect is disabled
override = {
name = 'opensearch',
@@ -164,7 +164,7 @@ local Queue = {}
Queue.__index = Queue
function Queue:new()
- local obj = {first = 1, last = 0, data = {}}
+ local obj = { first = 1, last = 0, data = {} }
setmetatable(obj, self)
return obj
end
@@ -234,7 +234,7 @@ local buffer = {
}
local function contains(tbl, val)
- for i=1,#tbl do
+ for i = 1, #tbl do
if tbl[i]:lower() == val:lower() then
return true
end
@@ -244,7 +244,7 @@ end
local function safe_get(table, ...)
local value = table
- for _, key in ipairs({...}) do
+ for _, key in ipairs({ ... }) do
if value[key] == nil then
return nil
end
@@ -284,10 +284,10 @@ local function compare_versions(v1, v2)
return 0 -- versions are equal
end
-local function handle_error(action,component,limit)
+local function handle_error(action, component, limit)
if states[component]['errors'] >= limit then
rspamd_logger.errx(rspamd_config, 'cannot %s elastic %s, failed attempts: %s/%s, stop trying',
- action, component:gsub('_', ' '), states[component]['errors'], limit)
+ action, component:gsub('_', ' '), states[component]['errors'], limit)
states[component]['configured'] = true
else
states[component]['errors'] = states[component]['errors'] + 1
@@ -318,25 +318,25 @@ end
local function is_empty(str)
-- define a pattern that includes invisible unicode characters
local str_cleared = str:gsub('[' ..
- '\xC2\xA0' .. -- U+00A0 non-breaking space
- '\xE2\x80\x8B' .. -- U+200B zero width space
- '\xEF\xBB\xBF' .. -- U+FEFF byte order mark (zero width no-break space)
- '\xE2\x80\x8C' .. -- U+200C zero width non-joiner
- '\xE2\x80\x8D' .. -- U+200D zero width joiner
- '\xE2\x80\x8E' .. -- U+200E left-to-right mark
- '\xE2\x80\x8F' .. -- U+200F right-to-left mark
- '\xE2\x81\xA0' .. -- U+2060 word joiner
- '\xE2\x80\xAA' .. -- U+202A left-to-right embedding
- '\xE2\x80\xAB' .. -- U+202B right-to-left embedding
- '\xE2\x80\xAC' .. -- U+202C pop directional formatting
- '\xE2\x80\xAD' .. -- U+202D left-to-right override
- '\xE2\x80\xAE' .. -- U+202E right-to-left override
- '\xE2\x81\x9F' .. -- U+2061 function application
- '\xE2\x81\xA1' .. -- U+2061 invisible separator
- '\xE2\x81\xA2' .. -- U+2062 invisible times
- '\xE2\x81\xA3' .. -- U+2063 invisible separator
- '\xE2\x81\xA4' .. -- U+2064 invisible plus
- ']', '') -- gsub replaces all matched characters with an empty string
+ '\xC2\xA0' .. -- U+00A0 non-breaking space
+ '\xE2\x80\x8B' .. -- U+200B zero width space
+ '\xEF\xBB\xBF' .. -- U+FEFF byte order mark (zero width no-break space)
+ '\xE2\x80\x8C' .. -- U+200C zero width non-joiner
+ '\xE2\x80\x8D' .. -- U+200D zero width joiner
+ '\xE2\x80\x8E' .. -- U+200E left-to-right mark
+ '\xE2\x80\x8F' .. -- U+200F right-to-left mark
+ '\xE2\x81\xA0' .. -- U+2060 word joiner
+ '\xE2\x80\xAA' .. -- U+202A left-to-right embedding
+ '\xE2\x80\xAB' .. -- U+202B right-to-left embedding
+ '\xE2\x80\xAC' .. -- U+202C pop directional formatting
+ '\xE2\x80\xAD' .. -- U+202D left-to-right override
+ '\xE2\x80\xAE' .. -- U+202E right-to-left override
+ '\xE2\x81\x9F' .. -- U+2061 function application
+ '\xE2\x81\xA1' .. -- U+2061 invisible separator
+ '\xE2\x81\xA2' .. -- U+2062 invisible times
+ '\xE2\x81\xA3' .. -- U+2063 invisible separator
+ '\xE2\x81\xA4' .. -- U+2064 invisible plus
+ ']', '') -- gsub replaces all matched characters with an empty string
if str_cleared:match('[%S]') then
return false
else
@@ -350,7 +350,7 @@ local function fill_empty_strings(tbl, empty_value)
if value and type(value) == 'table' then
local nested_filtered = fill_empty_strings(value, empty_value)
if next(nested_filtered) ~= nil then
- filled_tbl[key] = nested_filtered
+ filled_tbl[key] = nested_filtered
end
elseif type(value) == 'boolean' then
filled_tbl[key] = value
@@ -367,8 +367,8 @@ local function create_bulk_json(es_index, logs_to_send)
local tbl = {}
for _, row in pairs(logs_to_send) do
local pipeline = ''
- if settings['geoip']['enabled']then
- pipeline = ',"pipeline":"'.. settings['geoip']['pipeline_name'] .. '"'
+ if settings['geoip']['enabled'] then
+ pipeline = ',"pipeline":"' .. settings['geoip']['pipeline_name'] .. '"'
end
table.insert(tbl, '{"index":{"_index":"' .. es_index .. '"' .. pipeline .. '}}')
table.insert(tbl, ucl.to_format(row, 'json-compact'))
@@ -401,21 +401,21 @@ local function elastic_send_data(flush_all, task, cfg, ev_base)
push_url = connect_prefix .. ip_addr .. '/' .. es_index .. '/_bulk'
bulk_json = create_bulk_json(es_index, logs_to_send)
- rspamd_logger.debugm(N, log_object, 'successfully composed payload with %s log lines', nlogs_to_send)
+ lua_util.debugm(N, log_object, 'successfully composed payload with %s log lines', nlogs_to_send)
end
local function http_callback(err, code, body, _)
local push_done = false
if err then
rspamd_logger.errx(log_object, 'cannot send logs to elastic (%s): %s; failed attempts: %s/%s',
- push_url, err, buffer['errors'], settings['limits']['max_fail'])
+ push_url, err, buffer['errors'], settings['limits']['max_fail'])
elseif code == 200 then
local parser = ucl.parser()
local res, ucl_err = parser:parse_string(body)
if not ucl_err and res then
local obj = parser:get_object()
push_done = true
- rspamd_logger.debugm(N, log_object, 'successfully sent payload with %s logs', nlogs_to_send)
+ lua_util.debugm(N, log_object, 'successfully sent payload with %s logs', nlogs_to_send)
if obj['errors'] then
for _, value in pairs(obj['items']) do
if value['index'] and value['index']['status'] >= 400 then
@@ -424,20 +424,20 @@ local function elastic_send_data(flush_all, task, cfg, ev_base)
local error_type = safe_get(value, 'index', 'error', 'type') or ''
local error_reason = safe_get(value, 'index', 'error', 'reason') or ''
rspamd_logger.warnx(log_object,
- 'error while pushing logs to elastic, status: %s, index: %s, type: %s, reason: %s',
- status, index, error_type, error_reason)
+ 'error while pushing logs to elastic, status: %s, index: %s, type: %s, reason: %s',
+ status, index, error_type, error_reason)
end
end
end
else
rspamd_logger.errx(log_object,
- 'cannot parse response from elastic (%s): %s; failed attempts: %s/%s',
- push_url, ucl_err, buffer['errors'], settings['limits']['max_fail'])
+ 'cannot parse response from elastic (%s): %s; failed attempts: %s/%s',
+ push_url, ucl_err, buffer['errors'], settings['limits']['max_fail'])
end
else
rspamd_logger.errx(log_object,
- 'cannot send logs to elastic (%s) due to bad http status code: %s, response: %s; failed attempts: %s/%s',
- push_url, code, body, buffer['errors'], settings['limits']['max_fail'])
+ 'cannot send logs to elastic (%s) due to bad http status code: %s, response: %s; failed attempts: %s/%s',
+ push_url, code, body, buffer['errors'], settings['limits']['max_fail'])
end
-- proccess results
if push_done then
@@ -447,8 +447,9 @@ local function elastic_send_data(flush_all, task, cfg, ev_base)
else
upstream:fail()
if buffer['errors'] >= settings['limits']['max_fail'] then
- rspamd_logger.errx(log_object, 'failed to send %s log lines, failed attempts: %s/%s, removing failed logs from bugger',
- nlogs_to_send, buffer['errors'], settings['limits']['max_fail'])
+ rspamd_logger.errx(log_object,
+ 'failed to send %s log lines, failed attempts: %s/%s, removing failed logs from bugger',
+ nlogs_to_send, buffer['errors'], settings['limits']['max_fail'])
buffer['logs']:pop_first(nlogs_to_send)
buffer['errors'] = 0
else
@@ -466,7 +467,7 @@ local function elastic_send_data(flush_all, task, cfg, ev_base)
},
body = bulk_json,
method = 'post',
- callback=http_callback,
+ callback = http_callback,
gzip = settings.use_gzip,
keepalive = settings.use_keepalive,
no_ssl_verify = settings.no_ssl_verify,
@@ -564,8 +565,8 @@ local function get_general_metadata(task)
if task:has_from('smtp') then
local from = task:get_from({ 'smtp', 'orig' })[1]
if from and
- from['user'] and #from['user'] > 0 and
- from['domain'] and #from['domain'] > 0
+ from['user'] and #from['user'] > 0 and
+ from['domain'] and #from['domain'] > 0
then
r.from_user = from['user']
r.from_domain = from['domain']:lower()
@@ -577,8 +578,8 @@ local function get_general_metadata(task)
if task:has_from('mime') then
local mime_from = task:get_from({ 'mime', 'orig' })[1]
if mime_from and
- mime_from['user'] and #mime_from['user'] > 0 and
- mime_from['domain'] and #mime_from['domain'] > 0
+ mime_from['user'] and #mime_from['user'] > 0 and
+ mime_from['domain'] and #mime_from['domain'] > 0
then
r.mime_from_user = mime_from['user']
r.mime_from_domain = mime_from['domain']:lower()
@@ -612,14 +613,14 @@ local function get_general_metadata(task)
local l = {}
for _, h in ipairs(hdr) do
if settings['index_template']['headers_count_ignore_above'] ~= 0 and
- #l >= settings['index_template']['headers_count_ignore_above']
+ #l >= settings['index_template']['headers_count_ignore_above']
then
table.insert(l, 'ignored above...')
break
end
local header
if settings['index_template']['headers_text_ignore_above'] ~= 0 and
- h.decoded and #h.decoded >= headers_text_ignore_above
+ h.decoded and #h.decoded >= headers_text_ignore_above
then
header = h.decoded:sub(1, headers_text_ignore_above) .. '...'
elseif h.decoded and #h.decoded > 0 then
@@ -663,10 +664,10 @@ local function get_general_metadata(task)
local lang_t = {}
if parts then
for _, part in ipairs(parts) do
- local l = part:get_language()
- if l and not contains(lang_t, l) then
- table.insert(lang_t, l)
- end
+ local l = part:get_language()
+ if l and not contains(lang_t, l) then
+ table.insert(lang_t, l)
+ end
end
if #lang_t > 0 then
r.language = lang_t
@@ -701,15 +702,15 @@ local function elastic_collect(task)
if buffer['logs']:length() >= settings['limits']['max_rows'] then
buffer['logs']:pop_first(settings['limits']['max_rows'])
rspamd_logger.errx(task,
- 'elastic distro not supported, deleting %s logs from buffer due to reaching max rows limit',
- settings['limits']['max_rows'])
+ 'elastic distro not supported, deleting %s logs from buffer due to reaching max rows limit',
+ settings['limits']['max_rows'])
end
end
local now = tostring(rspamd_util.get_time() * 1000)
local row = { ['rspamd_meta'] = get_general_metadata(task), ['@timestamp'] = now }
buffer['logs']:push(row)
- rspamd_logger.debugm(N, task, 'saved log to buffer')
+ lua_util.debugm(N, task, 'saved log to buffer')
end
local function periodic_send_data(cfg, ev_base)
@@ -718,7 +719,8 @@ local function periodic_send_data(cfg, ev_base)
local nlogs_total = buffer['logs']:length()
if nlogs_total >= settings['limits']['max_rows'] then
- rspamd_logger.infox(rspamd_config, 'flushing buffer by reaching max rows: %s/%s', nlogs_total, settings['limits']['max_rows'])
+ rspamd_logger.infox(rspamd_config, 'flushing buffer by reaching max rows: %s/%s', nlogs_total,
+ settings['limits']['max_rows'])
flush_needed = true
else
local first_row = buffer['logs']:get(1)
@@ -726,8 +728,9 @@ local function periodic_send_data(cfg, ev_base)
local time_diff = now - first_row['@timestamp']
local time_diff_sec = lua_util.round((time_diff / 1000), 1)
if time_diff_sec > settings.limits.max_interval then
- rspamd_logger.infox(rspamd_config, 'flushing buffer for %s by reaching max interval, oldest log in buffer written %s sec ago',
- time_diff_sec, first_row['@timestamp'])
+ rspamd_logger.infox(rspamd_config,
+ 'flushing buffer for %s by reaching max interval, oldest log in buffer written %s sec ago',
+ time_diff_sec, first_row['@timestamp'])
flush_needed = true
end
end
@@ -770,8 +773,8 @@ local function configure_geoip_pipeline(cfg, ev_base)
upstream:ok()
else
rspamd_logger.errx(rspamd_config,
- 'cannot configure elastic geoip pipeline (%s), status code: %s, response: %s',
- geoip_url, code, body)
+ 'cannot configure elastic geoip pipeline (%s), status code: %s, response: %s',
+ geoip_url, code, body)
upstream:fail()
handle_error('configure', 'geoip_pipeline', settings['limits']['max_fail'])
end
@@ -807,7 +810,8 @@ local function put_index_policy(cfg, ev_base, upstream, host, policy_url, index_
states['index_policy']['configured'] = true
upstream:ok()
else
- rspamd_logger.errx(rspamd_config, 'cannot configure elastic index policy (%s), status code: %s, response: %s', policy_url, code, body)
+ rspamd_logger.errx(rspamd_config, 'cannot configure elastic index policy (%s), status code: %s, response: %s',
+ policy_url, code, body)
upstream:fail()
handle_error('configure', 'index_policy', settings['limits']['max_fail'])
end
@@ -862,7 +866,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_
local current_states = safe_get(remote_policy, 'policy', 'states')
if not lua_util.table_cmp(our_policy['policy']['default_state'], current_default_state) then
update_needed = true
- elseif not lua_util.table_cmp(our_policy['policy']['ism_template'][1]['index_patterns'], current_ism_index_patterns) then
+ elseif not lua_util.table_cmp(our_policy['policy']['ism_template'][1]['index_patterns'],
+ current_ism_index_patterns) then
update_needed = true
elseif not lua_util.table_cmp(our_policy['policy']['states'], current_states) then
update_needed = true
@@ -885,8 +890,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_
put_index_policy(cfg, ev_base, upstream, host, policy_url, index_policy_json)
else
rspamd_logger.errx(rspamd_config,
- 'current elastic index policy (%s) not returned correct seq_no/primary_term, policy will not be updated, response: %s',
- policy_url, body)
+ 'current elastic index policy (%s) not returned correct seq_no/primary_term, policy will not be updated, response: %s',
+ policy_url, body)
upstream:fail()
handle_error('validate current', 'index_policy', settings['limits']['max_fail'])
end
@@ -904,8 +909,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_
end
else
rspamd_logger.errx(rspamd_config,
- 'cannot get current elastic index policy (%s), status code: %s, response: %s',
- policy_url, code, body)
+ 'cannot get current elastic index policy (%s), status code: %s, response: %s',
+ policy_url, code, body)
handle_error('get current', 'index_policy', settings['limits']['max_fail'])
upstream:fail()
end
@@ -1032,7 +1037,7 @@ local function configure_index_policy(cfg, ev_base)
}
index_policy['policy']['phases']['delete'] = delete_obj
end
- -- opensearch state policy with hot state
+ -- opensearch state policy with hot state
elseif detected_distro['name'] == 'opensearch' then
local retry = {
count = 3,
@@ -1376,7 +1381,7 @@ local function configure_index_template(cfg, ev_base)
upstream:ok()
else
rspamd_logger.errx(rspamd_config, 'cannot configure elastic index template (%s), status code: %s, response: %s',
- template_url, code, body)
+ template_url, code, body)
upstream:fail()
handle_error('configure', 'index_template', settings['limits']['max_fail'])
end
@@ -1419,7 +1424,8 @@ local function verify_distro(manual)
local supported_distro_info = supported_distro[detected_distro_name]
-- check that detected_distro_version is valid
if not detected_distro_version or type(detected_distro_version) ~= 'string' then
- rspamd_logger.errx(rspamd_config, 'elastic version should be a string, but we received: %s', type(detected_distro_version))
+ rspamd_logger.errx(rspamd_config, 'elastic version should be a string, but we received: %s',
+ type(detected_distro_version))
valid = false
elseif detected_distro_version == '' then
rspamd_logger.errx(rspamd_config, 'unsupported elastic version: empty string')
@@ -1429,18 +1435,20 @@ local function verify_distro(manual)
local cmp_from = compare_versions(detected_distro_version, supported_distro_info['from'])
if cmp_from == -1 then
rspamd_logger.errx(rspamd_config, 'unsupported elastic version: %s, minimal supported version of %s is %s',
- detected_distro_version, detected_distro_name, supported_distro_info['from'])
+ detected_distro_version, detected_distro_name, supported_distro_info['from'])
valid = false
else
local cmp_till = compare_versions(detected_distro_version, supported_distro_info['till'])
if (cmp_till >= 0) and not supported_distro_info['till_unknown'] then
- rspamd_logger.errx(rspamd_config, 'unsupported elastic version: %s, maximum supported version of %s is less than %s',
- detected_distro_version, detected_distro_name, supported_distro_info['till'])
+ rspamd_logger.errx(rspamd_config,
+ 'unsupported elastic version: %s, maximum supported version of %s is less than %s',
+ detected_distro_version, detected_distro_name, supported_distro_info['till'])
valid = false
elseif (cmp_till >= 0) and supported_distro_info['till_unknown'] then
rspamd_logger.warnx(rspamd_config,
- 'compatibility of elastic version: %s is unknown, maximum known supported version of %s is less than %s, use at your own risk',
- detected_distro_version, detected_distro_name, supported_distro_info['till'])
+ 'compatibility of elastic version: %s is unknown, maximum known supported version of %s is less than %s,' ..
+ 'use at your own risk',
+ detected_distro_version, detected_distro_name, supported_distro_info['till'])
valid_unknown = true
end
end
@@ -1452,14 +1460,14 @@ local function verify_distro(manual)
else
if valid and manual then
rspamd_logger.infox(
- rspamd_config, 'assuming elastic distro: %s, version: %s', detected_distro_name, detected_distro_version)
+ rspamd_config, 'assuming elastic distro: %s, version: %s', detected_distro_name, detected_distro_version)
detected_distro['supported'] = true
elseif valid and not manual then
rspamd_logger.infox(rspamd_config, 'successfully connected to elastic distro: %s, version: %s',
- detected_distro_name, detected_distro_version)
+ detected_distro_name, detected_distro_version)
detected_distro['supported'] = true
else
- handle_error('configure','distro',settings['version']['autodetect_max_fail'])
+ handle_error('configure', 'distro', settings['version']['autodetect_max_fail'])
end
end
end
@@ -1468,7 +1476,8 @@ local function configure_distro(cfg, ev_base)
if not settings['version']['autodetect_enabled'] then
detected_distro['name'] = settings['version']['override']['name']
detected_distro['version'] = settings['version']['override']['version']
- rspamd_logger.infox(rspamd_config, 'automatic detection of elastic distro and version is disabled, taking configuration from settings')
+ rspamd_logger.infox(rspamd_config,
+ 'automatic detection of elastic distro and version is disabled, taking configuration from settings')
verify_distro(true)
end
@@ -1481,7 +1490,8 @@ local function configure_distro(cfg, ev_base)
rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s): %s', root_url, err)
upstream:fail()
elseif code ~= 200 then
- rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s), status code: %s, response: %s', root_url, code, body)
+ rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s), status code: %s, response: %s', root_url, code,
+ body)
upstream:fail()
else
local parser = ucl.parser()
@@ -1492,10 +1502,10 @@ local function configure_distro(cfg, ev_base)
else
local obj = parser:get_object()
if obj['tagline'] == "The OpenSearch Project: https://opensearch.org/" then
- detected_distro['name'] = 'opensearch'
+ detected_distro['name'] = 'opensearch'
end
if obj['tagline'] == "You Know, for Search" then
- detected_distro['name'] = 'elastic'
+ detected_distro['name'] = 'elastic'
end
if obj['version'] then
if obj['version']['number'] then
@@ -1537,7 +1547,7 @@ end
local opts = rspamd_config:get_all_opt('elastic')
if opts then
- for k,v in pairs(opts) do
+ for k, v in pairs(opts) do
settings[k] = v
end
@@ -1574,7 +1584,7 @@ if opts then
rspamd_config:register_finish_script(function(task)
local nlogs_total = buffer['logs']:length()
if nlogs_total > 0 then
- rspamd_logger.debugm(N, task, 'flushing buffer on shutdown, buffer size: %s', nlogs_total)
+ lua_util.debugm(N, task, 'flushing buffer on shutdown, buffer size: %s', nlogs_total)
elastic_send_data(true, task)
end
end)
diff --git a/src/plugins/lua/external_services.lua b/src/plugins/lua/external_services.lua
index e299d9faa..307218d94 100644
--- a/src/plugins/lua/external_services.lua
+++ b/src/plugins/lua/external_services.lua
@@ -274,7 +274,7 @@ if opts and type(opts) == 'table' then
for _, p in ipairs(m['patterns']) do
if type(p) == 'table' then
for sym in pairs(p) do
- rspamd_logger.debugm(N, rspamd_config, 'registering: %1', {
+ lua_util.debugm(N, rspamd_config, 'registering: %1', {
type = 'virtual',
name = sym,
parent = m['symbol'],
@@ -307,7 +307,7 @@ if opts and type(opts) == 'table' then
for _, p in ipairs(m['patterns_fail']) do
if type(p) == 'table' then
for sym in pairs(p) do
- rspamd_logger.debugm(N, rspamd_config, 'registering: %1', {
+ lua_util.debugm(N, rspamd_config, 'registering: %1', {
type = 'virtual',
name = sym,
parent = m['symbol'],
diff --git a/src/plugins/lua/neural.lua b/src/plugins/lua/neural.lua
index f3b26f11a..ea40fc4f7 100644
--- a/src/plugins/lua/neural.lua
+++ b/src/plugins/lua/neural.lua
@@ -687,31 +687,31 @@ local function maybe_train_existing_ann(worker, ev_base, rule, set, profiles)
return l >= rule.train.max_trains * (1.0 - rule.train.classes_bias)
end
if max_len >= rule.train.max_trains and fun.all(len_bias_check_pred, lens) then
- rspamd_logger.debugm(N, rspamd_config,
+ lua_util.debugm(N, rspamd_config,
'can start ANN %s learn as it has %s learn vectors; %s required, after checking %s vectors',
ann_key, lens, rule.train.max_trains, what)
cont_cb()
else
- rspamd_logger.debugm(N, rspamd_config,
+ lua_util.debugm(N, rspamd_config,
'cannot learn ANN %s now: there are not enough %s learn vectors (has %s vectors; %s required)',
ann_key, what, lens, rule.train.max_trains)
end
else
-- Probabilistic mode, just ensure that at least one vector is okay
if min_len > 0 and max_len >= rule.train.max_trains then
- rspamd_logger.debugm(N, rspamd_config,
+ lua_util.debugm(N, rspamd_config,
'can start ANN %s learn as it has %s learn vectors; %s required, after checking %s vectors',
ann_key, lens, rule.train.max_trains, what)
cont_cb()
else
- rspamd_logger.debugm(N, rspamd_config,
+ lua_util.debugm(N, rspamd_config,
'cannot learn ANN %s now: there are not enough %s learn vectors (has %s vectors; %s required)',
ann_key, what, lens, rule.train.max_trains)
end
end
else
- rspamd_logger.debugm(N, rspamd_config,
+ lua_util.debugm(N, rspamd_config,
'checked %s vectors in ANN %s: %s vectors; %s required, need to check other class vectors',
what, ann_key, ntrains, rule.train.max_trains)
cont_cb()
diff --git a/src/plugins/lua/p0f.lua b/src/plugins/lua/p0f.lua
index 97757c23a..727e6d1bb 100644
--- a/src/plugins/lua/p0f.lua
+++ b/src/plugins/lua/p0f.lua
@@ -105,7 +105,7 @@ if rule then
end
for sym in pairs(rule.patterns) do
- rspamd_logger.debugm(N, rspamd_config, 'registering: %1', {
+ lua_util.debugm(N, rspamd_config, 'registering: %1', {
type = 'virtual',
name = sym,
parent = id,
diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua
index 05e08c0f4..3f5c9e634 100644
--- a/src/plugins/lua/phishing.lua
+++ b/src/plugins/lua/phishing.lua
@@ -71,7 +71,7 @@ end
local function phishing_cb(task)
local function check_phishing_map(table)
local phishing_data = {}
- for k,v in pairs(table) do
+ for k, v in pairs(table) do
phishing_data[k] = v
end
local url = phishing_data.url
@@ -89,6 +89,8 @@ local function phishing_cb(task)
local data = nil
if elt then
+ lua_util.debugm(N, task, 'found host element: %s',
+ host)
local path = url:get_path()
local query = url:get_query()
@@ -156,7 +158,7 @@ local function phishing_cb(task)
local function check_phishing_dns(table)
local phishing_data = {}
- for k,v in pairs(table) do
+ for k, v in pairs(table) do
phishing_data[k] = v
end
local url = phishing_data.url
@@ -192,7 +194,7 @@ local function phishing_cb(task)
end
local to_resolve_hp = compose_dns_query({ host, path })
- rspamd_logger.debugm(N, task, 'try to resolve {%s, %s} -> %s',
+ lua_util.debugm(N, task, 'try to resolve {%s, %s} -> %s',
host, path, to_resolve_hp)
r:resolve_txt({
task = task,
@@ -207,7 +209,7 @@ local function phishing_cb(task)
end
local to_resolve_hpq = compose_dns_query({ host, path, query })
- rspamd_logger.debugm(N, task, 'try to resolve {%s, %s, %s} -> %s',
+ lua_util.debugm(N, task, 'try to resolve {%s, %s, %s} -> %s',
host, path, query, to_resolve_hpq)
r:resolve_txt({
task = task,
@@ -256,9 +258,14 @@ local function phishing_cb(task)
end
if url:is_phished() then
+ local surl = tostring(url)
local purl
+ lua_util.debugm(N, task, 'found phished url: %s',
+ surl)
if url:is_redirected() then
+ lua_util.debugm(N, task, 'url %s is also been redirected',
+ surl)
local rspamd_url = require "rspamd_url"
-- Examine the real redirect target instead of the url
local redirected_url = url:get_redirected()
@@ -268,6 +275,7 @@ local function phishing_cb(task)
purl = rspamd_url.create(task:get_mempool(), url:get_visible())
url = redirected_url
+ surl = string.format("redirected(%s)", tostring(url))
else
purl = url:get_phished()
end
@@ -276,6 +284,10 @@ local function phishing_cb(task)
return
end
+ local spurl = tostring(purl)
+ lua_util.debugm(N, task, 'processing pair %s -> %s',
+ surl, spurl)
+
local tld = url:get_tld()
local ptld = purl:get_tld()
@@ -308,10 +320,11 @@ local function phishing_cb(task)
local weight = 1.0
local spoofed, why = util.is_utf_spoofed(tld, ptld)
if spoofed then
- lua_util.debugm(N, task, "confusable: %1 -> %2: %3", tld, ptld, why)
+ lua_util.debugm(N, task, "confusable: %s -> %s: %s", tld, ptld, why)
weight = 1.0
else
- local dist = util.levenshtein_distance(stripped_tld, stripped_ptld, 2)
+ local dist = (stripped_tld == stripped_ptld) and 0
+ or util.levenshtein_distance(stripped_tld, stripped_ptld, 2)
dist = 2 * dist / (#stripped_tld + #stripped_ptld)
if dist > 0.3 and dist <= 1.0 then
@@ -330,15 +343,18 @@ local function phishing_cb(task)
if a1 ~= a2 then
weight = 1
- lua_util.debugm(N, task, "confusable: %1 -> %2: different characters",
- tld, ptld, why)
+ lua_util.debugm(N, task, "confusable: %s -> %s: different characters",
+ tld, ptld)
else
-- We have totally different strings in tld, so penalize it somehow
weight = 0.5
end
+ elseif dist == 0 then
+ -- Same domains, not phishing!
+ weight = 0.0
end
- lua_util.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)
+ lua_util.debugm(N, task, "distance: %s -> %s: %s; weight = %s", tld, ptld, dist, weight)
end
local function is_url_in_map(map, furl)
@@ -368,15 +384,17 @@ local function phishing_cb(task)
end
end
- found_in_map(strict_domains_maps, purl, 1.0)
- if not found_in_map(anchor_exceptions_maps) then
- if not found_in_map(phishing_exceptions_maps, purl, 1.0) then
- if domains then
- if is_url_in_map(domains, purl) then
+ if weight > 0 then
+ found_in_map(strict_domains_maps, purl, 1.0)
+ if not found_in_map(anchor_exceptions_maps) then
+ if not found_in_map(phishing_exceptions_maps, purl, 1.0) then
+ if domains then
+ if is_url_in_map(domains, purl) then
+ task:insert_result(symbol, weight, ptld .. '->' .. tld)
+ end
+ else
task:insert_result(symbol, weight, ptld .. '->' .. tld)
end
- else
- task:insert_result(symbol, weight, ptld .. '->' .. tld)
end
end
end
diff --git a/src/plugins/lua/reputation.lua b/src/plugins/lua/reputation.lua
index a3af26c91..bd7d91932 100644
--- a/src/plugins/lua/reputation.lua
+++ b/src/plugins/lua/reputation.lua
@@ -955,7 +955,7 @@ local function reputation_redis_init(rule, cfg, ev_base, worker)
local get_script = lua_util.jinja_template(redis_get_script_tpl,
{ windows = rule.backend.config.buckets })
- rspamd_logger.debugm(N, rspamd_config, 'added extraction script %s', get_script)
+ lua_util.debugm(N, rspamd_config, 'added extraction script %s', get_script)
rule.backend.script_get = lua_redis.add_redis_script(get_script, our_redis_params)
-- Redis script to update Redis buckets
@@ -1003,7 +1003,7 @@ local function reputation_redis_init(rule, cfg, ev_base, worker)
local set_script = lua_util.jinja_template(redis_adaptive_emea_script_tpl,
{ windows = rule.backend.config.buckets })
- rspamd_logger.debugm(N, rspamd_config, 'added emea update script %s', set_script)
+ lua_util.debugm(N, rspamd_config, 'added emea update script %s', set_script)
rule.backend.script_set = lua_redis.add_redis_script(set_script, our_redis_params)
return true
diff --git a/src/plugins/lua/url_redirector.lua b/src/plugins/lua/url_redirector.lua
index 10b5fb255..c1fa85cae 100644
--- a/src/plugins/lua/url_redirector.lua
+++ b/src/plugins/lua/url_redirector.lua
@@ -182,7 +182,7 @@ local function resolve_cached(task, orig_url, url, key, ntries)
local function resolve_url()
if ntries > settings.nested_limit then
-- We cannot resolve more, stop
- rspamd_logger.debugm(N, task, 'cannot get more requests to resolve %s, stop on %s after %s attempts',
+ lua_util.debugm(N, task, 'cannot get more requests to resolve %s, stop on %s after %s attempts',
orig_url, url, ntries)
cache_url(task, orig_url, url, key, 'nested')
local str_orig_url = tostring(orig_url)
@@ -223,7 +223,7 @@ local function resolve_cached(task, orig_url, url, key, ntries)
if loc then
redir_url = rspamd_url.create(task:get_mempool(), loc)
end
- rspamd_logger.debugm(N, task, 'found redirect from %s to %s, err code %s',
+ lua_util.debugm(N, task, 'found redirect from %s to %s, err code %s',
orig_url, loc, code)
if redir_url then
@@ -239,11 +239,11 @@ local function resolve_cached(task, orig_url, url, key, ntries)
resolve_cached(task, orig_url, redir_url, key, ntries + 1)
end
else
- rspamd_logger.debugm(N, task, "no location, headers: %s", headers)
+ lua_util.debugm(N, task, "no location, headers: %s", headers)
cache_url(task, orig_url, url, key)
end
else
- rspamd_logger.debugm(N, task, 'found redirect error from %s to %s, err code: %s',
+ lua_util.debugm(N, task, 'found redirect error from %s to %s, err code: %s',
orig_url, url, code)
cache_url(task, orig_url, url, key)
end
@@ -278,7 +278,7 @@ local function resolve_cached(task, orig_url, url, key, ntries)
if type(data) == 'string' then
if data ~= 'processing' then
-- Got cached result
- rspamd_logger.debugm(N, task, 'found cached redirect from %s to %s',
+ lua_util.debugm(N, task, 'found cached redirect from %s to %s',
url, data)
if data:sub(1, 1) == '^' then
-- Prefixed url stored