diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-12-12 17:54:01 +0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-12 17:54:01 +0600 |
commit | 1e63b18528f33f89757709e1851fa3499e512e96 (patch) | |
tree | e45d5b884ed23f7fb303743171edc8ef99ca41ff | |
parent | 7e6bec3c411cf368f37c1701141a5f65895b741a (diff) | |
parent | 3197ce7e6ad44cd4afc0f0013dfc03f8a4deeec8 (diff) | |
download | rspamd-1e63b18528f33f89757709e1851fa3499e512e96.tar.gz rspamd-1e63b18528f33f89757709e1851fa3499e512e96.zip |
Merge pull request #5254 from rspamd/vstakhov-phishing-fixes
Fix phishing symbol for the same domains
-rw-r--r-- | src/lua/lua_util.c | 13 | ||||
-rw-r--r-- | src/plugins/lua/antivirus.lua | 4 | ||||
-rw-r--r-- | src/plugins/lua/arc.lua | 6 | ||||
-rw-r--r-- | src/plugins/lua/clickhouse.lua | 4 | ||||
-rw-r--r-- | src/plugins/lua/clustering.lua | 10 | ||||
-rw-r--r-- | src/plugins/lua/elastic.lua | 172 | ||||
-rw-r--r-- | src/plugins/lua/external_services.lua | 4 | ||||
-rw-r--r-- | src/plugins/lua/neural.lua | 10 | ||||
-rw-r--r-- | src/plugins/lua/p0f.lua | 2 | ||||
-rw-r--r-- | src/plugins/lua/phishing.lua | 50 | ||||
-rw-r--r-- | src/plugins/lua/reputation.lua | 4 | ||||
-rw-r--r-- | src/plugins/lua/url_redirector.lua | 10 |
12 files changed, 164 insertions, 125 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 14994751c..ce4d9f67c 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -2152,6 +2152,17 @@ lua_util_is_utf_spoofed(lua_State *L) return 1; } + + /* Disable single script confusables, as it is not what we want to check */ + uspoof_setChecks(spc, + USPOOF_CONFUSABLE & ~USPOOF_SINGLE_SCRIPT_CONFUSABLE, + &uc_err); + if (uc_err != U_ZERO_ERROR) { + msg_err("Cannot set proper checks for uspoof: %s", u_errorName(uc_err)); + lua_pushboolean(L, false); + uspoof_close(spc); + return 1; + } } ret = uspoof_areConfusableUTF8(spc, s1, l1, s2, l2, &uc_err); @@ -2174,7 +2185,7 @@ lua_util_is_utf_spoofed(lua_State *L) if (uc_err != U_ZERO_ERROR) { msg_err("Cannot set proper checks for uspoof: %s", u_errorName(uc_err)); lua_pushboolean(L, false); - uspoof_close(spc); + uspoof_close(spc_sgl); return 1; } } diff --git a/src/plugins/lua/antivirus.lua b/src/plugins/lua/antivirus.lua index e39ddc5ba..5337f6666 100644 --- a/src/plugins/lua/antivirus.lua +++ b/src/plugins/lua/antivirus.lua @@ -258,7 +258,7 @@ if opts and type(opts) == 'table' then for _, p in ipairs(m['patterns']) do if type(p) == 'table' then for sym in pairs(p) do - rspamd_logger.debugm(N, rspamd_config, 'registering: %1', { + lua_util.debugm(N, rspamd_config, 'registering: %1', { type = 'virtual', name = sym, parent = m['symbol'], @@ -292,7 +292,7 @@ if opts and type(opts) == 'table' then for _, p in ipairs(m['patterns_fail']) do if type(p) == 'table' then for sym in pairs(p) do - rspamd_logger.debugm(N, rspamd_config, 'registering: %1', { + lua_util.debugm(N, rspamd_config, 'registering: %1', { type = 'virtual', name = sym, parent = m['symbol'], diff --git a/src/plugins/lua/arc.lua b/src/plugins/lua/arc.lua index 90e254e78..fb5dd93e6 100644 --- a/src/plugins/lua/arc.lua +++ b/src/plugins/lua/arc.lua @@ -517,10 +517,10 @@ local function arc_sign_seal(task, params, header) local ar_header = task:get_header('Authentication-Results') if ar_header then - rspamd_logger.debugm(N, task, 'reuse authentication results header for ARC') + lua_util.debugm(N, task, 'reuse authentication results header for ARC') cur_auth_results = ar_header else - rspamd_logger.debugm(N, task, 'cannot reuse authentication results, header is missing') + lua_util.debugm(N, task, 'cannot reuse authentication results, header is missing') cur_auth_results = lua_auth_results.gen_auth_results(task, ar_settings) or '' end else @@ -639,7 +639,7 @@ local function prepare_arc_selector(task, sel) ar_header = ar_header or "" for k, v in string.gmatch(ar_header, "(%w+)=(%w+)") do if k == 'arc' then - return v + return v end end return nil diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua index 25eabc760..16a8ad4ec 100644 --- a/src/plugins/lua/clickhouse.lua +++ b/src/plugins/lua/clickhouse.lua @@ -1146,7 +1146,7 @@ local function upload_clickhouse_schema(upstream, ev_base, cfg, initial) errored = true return end - rspamd_logger.debugm(N, rspamd_config, 'uploaded clickhouse schema element %s to %s: %s', + lua_util.debugm(N, rspamd_config, 'uploaded clickhouse schema element %s to %s: %s', v, upstream:get_addr():to_string(true), reply) end @@ -1159,7 +1159,7 @@ local function upload_clickhouse_schema(upstream, ev_base, cfg, initial) if initial == v[2] then return lua_util.template(v[1], { SCHEMA_VERSION = tostring(schema_version) }) else - rspamd_logger.debugm(N, rspamd_config, 'skip clickhouse schema element %s: schema already exists', + lua_util.debugm(N, rspamd_config, 'skip clickhouse schema element %s: schema already exists', v) end end diff --git a/src/plugins/lua/clustering.lua b/src/plugins/lua/clustering.lua index d97bdb97e..a00ea9f43 100644 --- a/src/plugins/lua/clustering.lua +++ b/src/plugins/lua/clustering.lua @@ -135,7 +135,7 @@ local function clusterting_filter_cb(task, rule) end if not cluster_selector or not source_selector then - rspamd_logger.debugm(N, task, 'skip rule %s, selectors: source="%s", cluster="%s"', + lua_util.debugm(N, task, 'skip rule %s, selectors: source="%s", cluster="%s"', rule.name, source_selector, cluster_selector) return end @@ -153,7 +153,7 @@ local function clusterting_filter_cb(task, rule) -- We have seen this element in ham mostly, so subtract average it from the size score final_score = math.min(1.0, size_score - cluster_score / cur_elts) end - rspamd_logger.debugm(N, task, + lua_util.debugm(N, task, 'processed rule %s, selectors: source="%s", cluster="%s"; data: %s elts, %s score, %s elt score', rule.name, source_selector, cluster_selector, cur_elts, total_score, element_score) if final_score > 0.1 then @@ -205,7 +205,7 @@ local function clusterting_idempotent_cb(task, rule) elseif verdict == 'junk' then score = rule.junk_mult else - rspamd_logger.debugm(N, task, 'skip rule %s, verdict=%s', + lua_util.debugm(N, task, 'skip rule %s, verdict=%s', rule.name, verdict) return end @@ -218,7 +218,7 @@ local function clusterting_idempotent_cb(task, rule) end if not cluster_selector or not source_selector then - rspamd_logger.debugm(N, task, 'skip rule %s, selectors: source="%s", cluster="%s"', + lua_util.debugm(N, task, 'skip rule %s, selectors: source="%s", cluster="%s"', rule.name, source_selector, cluster_selector) return end @@ -228,7 +228,7 @@ local function clusterting_idempotent_cb(task, rule) rspamd_logger.errx(task, 'got error while getting clustering keys %s: %s', source_selector, err) else - rspamd_logger.debugm(N, task, 'set clustering key for %s: %s{%s} = %s', + lua_util.debugm(N, task, 'set clustering key for %s: %s{%s} = %s', source_selector, "unknown error") end end diff --git a/src/plugins/lua/elastic.lua b/src/plugins/lua/elastic.lua index f3eb3cc4f..8bed9fcf4 100644 --- a/src/plugins/lua/elastic.lua +++ b/src/plugins/lua/elastic.lua @@ -72,7 +72,7 @@ local settings = { enabled = true, version = { autodetect_enabled = true, - autodetect_max_fail = 30, + autodetect_max_fail = 30, -- override works only if autodetect is disabled override = { name = 'opensearch', @@ -164,7 +164,7 @@ local Queue = {} Queue.__index = Queue function Queue:new() - local obj = {first = 1, last = 0, data = {}} + local obj = { first = 1, last = 0, data = {} } setmetatable(obj, self) return obj end @@ -234,7 +234,7 @@ local buffer = { } local function contains(tbl, val) - for i=1,#tbl do + for i = 1, #tbl do if tbl[i]:lower() == val:lower() then return true end @@ -244,7 +244,7 @@ end local function safe_get(table, ...) local value = table - for _, key in ipairs({...}) do + for _, key in ipairs({ ... }) do if value[key] == nil then return nil end @@ -284,10 +284,10 @@ local function compare_versions(v1, v2) return 0 -- versions are equal end -local function handle_error(action,component,limit) +local function handle_error(action, component, limit) if states[component]['errors'] >= limit then rspamd_logger.errx(rspamd_config, 'cannot %s elastic %s, failed attempts: %s/%s, stop trying', - action, component:gsub('_', ' '), states[component]['errors'], limit) + action, component:gsub('_', ' '), states[component]['errors'], limit) states[component]['configured'] = true else states[component]['errors'] = states[component]['errors'] + 1 @@ -318,25 +318,25 @@ end local function is_empty(str) -- define a pattern that includes invisible unicode characters local str_cleared = str:gsub('[' .. - '\xC2\xA0' .. -- U+00A0 non-breaking space - '\xE2\x80\x8B' .. -- U+200B zero width space - '\xEF\xBB\xBF' .. -- U+FEFF byte order mark (zero width no-break space) - '\xE2\x80\x8C' .. -- U+200C zero width non-joiner - '\xE2\x80\x8D' .. -- U+200D zero width joiner - '\xE2\x80\x8E' .. -- U+200E left-to-right mark - '\xE2\x80\x8F' .. -- U+200F right-to-left mark - '\xE2\x81\xA0' .. -- U+2060 word joiner - '\xE2\x80\xAA' .. -- U+202A left-to-right embedding - '\xE2\x80\xAB' .. -- U+202B right-to-left embedding - '\xE2\x80\xAC' .. -- U+202C pop directional formatting - '\xE2\x80\xAD' .. -- U+202D left-to-right override - '\xE2\x80\xAE' .. -- U+202E right-to-left override - '\xE2\x81\x9F' .. -- U+2061 function application - '\xE2\x81\xA1' .. -- U+2061 invisible separator - '\xE2\x81\xA2' .. -- U+2062 invisible times - '\xE2\x81\xA3' .. -- U+2063 invisible separator - '\xE2\x81\xA4' .. -- U+2064 invisible plus - ']', '') -- gsub replaces all matched characters with an empty string + '\xC2\xA0' .. -- U+00A0 non-breaking space + '\xE2\x80\x8B' .. -- U+200B zero width space + '\xEF\xBB\xBF' .. -- U+FEFF byte order mark (zero width no-break space) + '\xE2\x80\x8C' .. -- U+200C zero width non-joiner + '\xE2\x80\x8D' .. -- U+200D zero width joiner + '\xE2\x80\x8E' .. -- U+200E left-to-right mark + '\xE2\x80\x8F' .. -- U+200F right-to-left mark + '\xE2\x81\xA0' .. -- U+2060 word joiner + '\xE2\x80\xAA' .. -- U+202A left-to-right embedding + '\xE2\x80\xAB' .. -- U+202B right-to-left embedding + '\xE2\x80\xAC' .. -- U+202C pop directional formatting + '\xE2\x80\xAD' .. -- U+202D left-to-right override + '\xE2\x80\xAE' .. -- U+202E right-to-left override + '\xE2\x81\x9F' .. -- U+2061 function application + '\xE2\x81\xA1' .. -- U+2061 invisible separator + '\xE2\x81\xA2' .. -- U+2062 invisible times + '\xE2\x81\xA3' .. -- U+2063 invisible separator + '\xE2\x81\xA4' .. -- U+2064 invisible plus + ']', '') -- gsub replaces all matched characters with an empty string if str_cleared:match('[%S]') then return false else @@ -350,7 +350,7 @@ local function fill_empty_strings(tbl, empty_value) if value and type(value) == 'table' then local nested_filtered = fill_empty_strings(value, empty_value) if next(nested_filtered) ~= nil then - filled_tbl[key] = nested_filtered + filled_tbl[key] = nested_filtered end elseif type(value) == 'boolean' then filled_tbl[key] = value @@ -367,8 +367,8 @@ local function create_bulk_json(es_index, logs_to_send) local tbl = {} for _, row in pairs(logs_to_send) do local pipeline = '' - if settings['geoip']['enabled']then - pipeline = ',"pipeline":"'.. settings['geoip']['pipeline_name'] .. '"' + if settings['geoip']['enabled'] then + pipeline = ',"pipeline":"' .. settings['geoip']['pipeline_name'] .. '"' end table.insert(tbl, '{"index":{"_index":"' .. es_index .. '"' .. pipeline .. '}}') table.insert(tbl, ucl.to_format(row, 'json-compact')) @@ -401,21 +401,21 @@ local function elastic_send_data(flush_all, task, cfg, ev_base) push_url = connect_prefix .. ip_addr .. '/' .. es_index .. '/_bulk' bulk_json = create_bulk_json(es_index, logs_to_send) - rspamd_logger.debugm(N, log_object, 'successfully composed payload with %s log lines', nlogs_to_send) + lua_util.debugm(N, log_object, 'successfully composed payload with %s log lines', nlogs_to_send) end local function http_callback(err, code, body, _) local push_done = false if err then rspamd_logger.errx(log_object, 'cannot send logs to elastic (%s): %s; failed attempts: %s/%s', - push_url, err, buffer['errors'], settings['limits']['max_fail']) + push_url, err, buffer['errors'], settings['limits']['max_fail']) elseif code == 200 then local parser = ucl.parser() local res, ucl_err = parser:parse_string(body) if not ucl_err and res then local obj = parser:get_object() push_done = true - rspamd_logger.debugm(N, log_object, 'successfully sent payload with %s logs', nlogs_to_send) + lua_util.debugm(N, log_object, 'successfully sent payload with %s logs', nlogs_to_send) if obj['errors'] then for _, value in pairs(obj['items']) do if value['index'] and value['index']['status'] >= 400 then @@ -424,20 +424,20 @@ local function elastic_send_data(flush_all, task, cfg, ev_base) local error_type = safe_get(value, 'index', 'error', 'type') or '' local error_reason = safe_get(value, 'index', 'error', 'reason') or '' rspamd_logger.warnx(log_object, - 'error while pushing logs to elastic, status: %s, index: %s, type: %s, reason: %s', - status, index, error_type, error_reason) + 'error while pushing logs to elastic, status: %s, index: %s, type: %s, reason: %s', + status, index, error_type, error_reason) end end end else rspamd_logger.errx(log_object, - 'cannot parse response from elastic (%s): %s; failed attempts: %s/%s', - push_url, ucl_err, buffer['errors'], settings['limits']['max_fail']) + 'cannot parse response from elastic (%s): %s; failed attempts: %s/%s', + push_url, ucl_err, buffer['errors'], settings['limits']['max_fail']) end else rspamd_logger.errx(log_object, - 'cannot send logs to elastic (%s) due to bad http status code: %s, response: %s; failed attempts: %s/%s', - push_url, code, body, buffer['errors'], settings['limits']['max_fail']) + 'cannot send logs to elastic (%s) due to bad http status code: %s, response: %s; failed attempts: %s/%s', + push_url, code, body, buffer['errors'], settings['limits']['max_fail']) end -- proccess results if push_done then @@ -447,8 +447,9 @@ local function elastic_send_data(flush_all, task, cfg, ev_base) else upstream:fail() if buffer['errors'] >= settings['limits']['max_fail'] then - rspamd_logger.errx(log_object, 'failed to send %s log lines, failed attempts: %s/%s, removing failed logs from bugger', - nlogs_to_send, buffer['errors'], settings['limits']['max_fail']) + rspamd_logger.errx(log_object, + 'failed to send %s log lines, failed attempts: %s/%s, removing failed logs from bugger', + nlogs_to_send, buffer['errors'], settings['limits']['max_fail']) buffer['logs']:pop_first(nlogs_to_send) buffer['errors'] = 0 else @@ -466,7 +467,7 @@ local function elastic_send_data(flush_all, task, cfg, ev_base) }, body = bulk_json, method = 'post', - callback=http_callback, + callback = http_callback, gzip = settings.use_gzip, keepalive = settings.use_keepalive, no_ssl_verify = settings.no_ssl_verify, @@ -564,8 +565,8 @@ local function get_general_metadata(task) if task:has_from('smtp') then local from = task:get_from({ 'smtp', 'orig' })[1] if from and - from['user'] and #from['user'] > 0 and - from['domain'] and #from['domain'] > 0 + from['user'] and #from['user'] > 0 and + from['domain'] and #from['domain'] > 0 then r.from_user = from['user'] r.from_domain = from['domain']:lower() @@ -577,8 +578,8 @@ local function get_general_metadata(task) if task:has_from('mime') then local mime_from = task:get_from({ 'mime', 'orig' })[1] if mime_from and - mime_from['user'] and #mime_from['user'] > 0 and - mime_from['domain'] and #mime_from['domain'] > 0 + mime_from['user'] and #mime_from['user'] > 0 and + mime_from['domain'] and #mime_from['domain'] > 0 then r.mime_from_user = mime_from['user'] r.mime_from_domain = mime_from['domain']:lower() @@ -612,14 +613,14 @@ local function get_general_metadata(task) local l = {} for _, h in ipairs(hdr) do if settings['index_template']['headers_count_ignore_above'] ~= 0 and - #l >= settings['index_template']['headers_count_ignore_above'] + #l >= settings['index_template']['headers_count_ignore_above'] then table.insert(l, 'ignored above...') break end local header if settings['index_template']['headers_text_ignore_above'] ~= 0 and - h.decoded and #h.decoded >= headers_text_ignore_above + h.decoded and #h.decoded >= headers_text_ignore_above then header = h.decoded:sub(1, headers_text_ignore_above) .. '...' elseif h.decoded and #h.decoded > 0 then @@ -663,10 +664,10 @@ local function get_general_metadata(task) local lang_t = {} if parts then for _, part in ipairs(parts) do - local l = part:get_language() - if l and not contains(lang_t, l) then - table.insert(lang_t, l) - end + local l = part:get_language() + if l and not contains(lang_t, l) then + table.insert(lang_t, l) + end end if #lang_t > 0 then r.language = lang_t @@ -701,15 +702,15 @@ local function elastic_collect(task) if buffer['logs']:length() >= settings['limits']['max_rows'] then buffer['logs']:pop_first(settings['limits']['max_rows']) rspamd_logger.errx(task, - 'elastic distro not supported, deleting %s logs from buffer due to reaching max rows limit', - settings['limits']['max_rows']) + 'elastic distro not supported, deleting %s logs from buffer due to reaching max rows limit', + settings['limits']['max_rows']) end end local now = tostring(rspamd_util.get_time() * 1000) local row = { ['rspamd_meta'] = get_general_metadata(task), ['@timestamp'] = now } buffer['logs']:push(row) - rspamd_logger.debugm(N, task, 'saved log to buffer') + lua_util.debugm(N, task, 'saved log to buffer') end local function periodic_send_data(cfg, ev_base) @@ -718,7 +719,8 @@ local function periodic_send_data(cfg, ev_base) local nlogs_total = buffer['logs']:length() if nlogs_total >= settings['limits']['max_rows'] then - rspamd_logger.infox(rspamd_config, 'flushing buffer by reaching max rows: %s/%s', nlogs_total, settings['limits']['max_rows']) + rspamd_logger.infox(rspamd_config, 'flushing buffer by reaching max rows: %s/%s', nlogs_total, + settings['limits']['max_rows']) flush_needed = true else local first_row = buffer['logs']:get(1) @@ -726,8 +728,9 @@ local function periodic_send_data(cfg, ev_base) local time_diff = now - first_row['@timestamp'] local time_diff_sec = lua_util.round((time_diff / 1000), 1) if time_diff_sec > settings.limits.max_interval then - rspamd_logger.infox(rspamd_config, 'flushing buffer for %s by reaching max interval, oldest log in buffer written %s sec ago', - time_diff_sec, first_row['@timestamp']) + rspamd_logger.infox(rspamd_config, + 'flushing buffer for %s by reaching max interval, oldest log in buffer written %s sec ago', + time_diff_sec, first_row['@timestamp']) flush_needed = true end end @@ -770,8 +773,8 @@ local function configure_geoip_pipeline(cfg, ev_base) upstream:ok() else rspamd_logger.errx(rspamd_config, - 'cannot configure elastic geoip pipeline (%s), status code: %s, response: %s', - geoip_url, code, body) + 'cannot configure elastic geoip pipeline (%s), status code: %s, response: %s', + geoip_url, code, body) upstream:fail() handle_error('configure', 'geoip_pipeline', settings['limits']['max_fail']) end @@ -807,7 +810,8 @@ local function put_index_policy(cfg, ev_base, upstream, host, policy_url, index_ states['index_policy']['configured'] = true upstream:ok() else - rspamd_logger.errx(rspamd_config, 'cannot configure elastic index policy (%s), status code: %s, response: %s', policy_url, code, body) + rspamd_logger.errx(rspamd_config, 'cannot configure elastic index policy (%s), status code: %s, response: %s', + policy_url, code, body) upstream:fail() handle_error('configure', 'index_policy', settings['limits']['max_fail']) end @@ -862,7 +866,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_ local current_states = safe_get(remote_policy, 'policy', 'states') if not lua_util.table_cmp(our_policy['policy']['default_state'], current_default_state) then update_needed = true - elseif not lua_util.table_cmp(our_policy['policy']['ism_template'][1]['index_patterns'], current_ism_index_patterns) then + elseif not lua_util.table_cmp(our_policy['policy']['ism_template'][1]['index_patterns'], + current_ism_index_patterns) then update_needed = true elseif not lua_util.table_cmp(our_policy['policy']['states'], current_states) then update_needed = true @@ -885,8 +890,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_ put_index_policy(cfg, ev_base, upstream, host, policy_url, index_policy_json) else rspamd_logger.errx(rspamd_config, - 'current elastic index policy (%s) not returned correct seq_no/primary_term, policy will not be updated, response: %s', - policy_url, body) + 'current elastic index policy (%s) not returned correct seq_no/primary_term, policy will not be updated, response: %s', + policy_url, body) upstream:fail() handle_error('validate current', 'index_policy', settings['limits']['max_fail']) end @@ -904,8 +909,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_ end else rspamd_logger.errx(rspamd_config, - 'cannot get current elastic index policy (%s), status code: %s, response: %s', - policy_url, code, body) + 'cannot get current elastic index policy (%s), status code: %s, response: %s', + policy_url, code, body) handle_error('get current', 'index_policy', settings['limits']['max_fail']) upstream:fail() end @@ -1032,7 +1037,7 @@ local function configure_index_policy(cfg, ev_base) } index_policy['policy']['phases']['delete'] = delete_obj end - -- opensearch state policy with hot state + -- opensearch state policy with hot state elseif detected_distro['name'] == 'opensearch' then local retry = { count = 3, @@ -1376,7 +1381,7 @@ local function configure_index_template(cfg, ev_base) upstream:ok() else rspamd_logger.errx(rspamd_config, 'cannot configure elastic index template (%s), status code: %s, response: %s', - template_url, code, body) + template_url, code, body) upstream:fail() handle_error('configure', 'index_template', settings['limits']['max_fail']) end @@ -1419,7 +1424,8 @@ local function verify_distro(manual) local supported_distro_info = supported_distro[detected_distro_name] -- check that detected_distro_version is valid if not detected_distro_version or type(detected_distro_version) ~= 'string' then - rspamd_logger.errx(rspamd_config, 'elastic version should be a string, but we received: %s', type(detected_distro_version)) + rspamd_logger.errx(rspamd_config, 'elastic version should be a string, but we received: %s', + type(detected_distro_version)) valid = false elseif detected_distro_version == '' then rspamd_logger.errx(rspamd_config, 'unsupported elastic version: empty string') @@ -1429,18 +1435,20 @@ local function verify_distro(manual) local cmp_from = compare_versions(detected_distro_version, supported_distro_info['from']) if cmp_from == -1 then rspamd_logger.errx(rspamd_config, 'unsupported elastic version: %s, minimal supported version of %s is %s', - detected_distro_version, detected_distro_name, supported_distro_info['from']) + detected_distro_version, detected_distro_name, supported_distro_info['from']) valid = false else local cmp_till = compare_versions(detected_distro_version, supported_distro_info['till']) if (cmp_till >= 0) and not supported_distro_info['till_unknown'] then - rspamd_logger.errx(rspamd_config, 'unsupported elastic version: %s, maximum supported version of %s is less than %s', - detected_distro_version, detected_distro_name, supported_distro_info['till']) + rspamd_logger.errx(rspamd_config, + 'unsupported elastic version: %s, maximum supported version of %s is less than %s', + detected_distro_version, detected_distro_name, supported_distro_info['till']) valid = false elseif (cmp_till >= 0) and supported_distro_info['till_unknown'] then rspamd_logger.warnx(rspamd_config, - 'compatibility of elastic version: %s is unknown, maximum known supported version of %s is less than %s, use at your own risk', - detected_distro_version, detected_distro_name, supported_distro_info['till']) + 'compatibility of elastic version: %s is unknown, maximum known supported version of %s is less than %s,' .. + 'use at your own risk', + detected_distro_version, detected_distro_name, supported_distro_info['till']) valid_unknown = true end end @@ -1452,14 +1460,14 @@ local function verify_distro(manual) else if valid and manual then rspamd_logger.infox( - rspamd_config, 'assuming elastic distro: %s, version: %s', detected_distro_name, detected_distro_version) + rspamd_config, 'assuming elastic distro: %s, version: %s', detected_distro_name, detected_distro_version) detected_distro['supported'] = true elseif valid and not manual then rspamd_logger.infox(rspamd_config, 'successfully connected to elastic distro: %s, version: %s', - detected_distro_name, detected_distro_version) + detected_distro_name, detected_distro_version) detected_distro['supported'] = true else - handle_error('configure','distro',settings['version']['autodetect_max_fail']) + handle_error('configure', 'distro', settings['version']['autodetect_max_fail']) end end end @@ -1468,7 +1476,8 @@ local function configure_distro(cfg, ev_base) if not settings['version']['autodetect_enabled'] then detected_distro['name'] = settings['version']['override']['name'] detected_distro['version'] = settings['version']['override']['version'] - rspamd_logger.infox(rspamd_config, 'automatic detection of elastic distro and version is disabled, taking configuration from settings') + rspamd_logger.infox(rspamd_config, + 'automatic detection of elastic distro and version is disabled, taking configuration from settings') verify_distro(true) end @@ -1481,7 +1490,8 @@ local function configure_distro(cfg, ev_base) rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s): %s', root_url, err) upstream:fail() elseif code ~= 200 then - rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s), status code: %s, response: %s', root_url, code, body) + rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s), status code: %s, response: %s', root_url, code, + body) upstream:fail() else local parser = ucl.parser() @@ -1492,10 +1502,10 @@ local function configure_distro(cfg, ev_base) else local obj = parser:get_object() if obj['tagline'] == "The OpenSearch Project: https://opensearch.org/" then - detected_distro['name'] = 'opensearch' + detected_distro['name'] = 'opensearch' end if obj['tagline'] == "You Know, for Search" then - detected_distro['name'] = 'elastic' + detected_distro['name'] = 'elastic' end if obj['version'] then if obj['version']['number'] then @@ -1537,7 +1547,7 @@ end local opts = rspamd_config:get_all_opt('elastic') if opts then - for k,v in pairs(opts) do + for k, v in pairs(opts) do settings[k] = v end @@ -1574,7 +1584,7 @@ if opts then rspamd_config:register_finish_script(function(task) local nlogs_total = buffer['logs']:length() if nlogs_total > 0 then - rspamd_logger.debugm(N, task, 'flushing buffer on shutdown, buffer size: %s', nlogs_total) + lua_util.debugm(N, task, 'flushing buffer on shutdown, buffer size: %s', nlogs_total) elastic_send_data(true, task) end end) diff --git a/src/plugins/lua/external_services.lua b/src/plugins/lua/external_services.lua index e299d9faa..307218d94 100644 --- a/src/plugins/lua/external_services.lua +++ b/src/plugins/lua/external_services.lua @@ -274,7 +274,7 @@ if opts and type(opts) == 'table' then for _, p in ipairs(m['patterns']) do if type(p) == 'table' then for sym in pairs(p) do - rspamd_logger.debugm(N, rspamd_config, 'registering: %1', { + lua_util.debugm(N, rspamd_config, 'registering: %1', { type = 'virtual', name = sym, parent = m['symbol'], @@ -307,7 +307,7 @@ if opts and type(opts) == 'table' then for _, p in ipairs(m['patterns_fail']) do if type(p) == 'table' then for sym in pairs(p) do - rspamd_logger.debugm(N, rspamd_config, 'registering: %1', { + lua_util.debugm(N, rspamd_config, 'registering: %1', { type = 'virtual', name = sym, parent = m['symbol'], diff --git a/src/plugins/lua/neural.lua b/src/plugins/lua/neural.lua index f3b26f11a..ea40fc4f7 100644 --- a/src/plugins/lua/neural.lua +++ b/src/plugins/lua/neural.lua @@ -687,31 +687,31 @@ local function maybe_train_existing_ann(worker, ev_base, rule, set, profiles) return l >= rule.train.max_trains * (1.0 - rule.train.classes_bias) end if max_len >= rule.train.max_trains and fun.all(len_bias_check_pred, lens) then - rspamd_logger.debugm(N, rspamd_config, + lua_util.debugm(N, rspamd_config, 'can start ANN %s learn as it has %s learn vectors; %s required, after checking %s vectors', ann_key, lens, rule.train.max_trains, what) cont_cb() else - rspamd_logger.debugm(N, rspamd_config, + lua_util.debugm(N, rspamd_config, 'cannot learn ANN %s now: there are not enough %s learn vectors (has %s vectors; %s required)', ann_key, what, lens, rule.train.max_trains) end else -- Probabilistic mode, just ensure that at least one vector is okay if min_len > 0 and max_len >= rule.train.max_trains then - rspamd_logger.debugm(N, rspamd_config, + lua_util.debugm(N, rspamd_config, 'can start ANN %s learn as it has %s learn vectors; %s required, after checking %s vectors', ann_key, lens, rule.train.max_trains, what) cont_cb() else - rspamd_logger.debugm(N, rspamd_config, + lua_util.debugm(N, rspamd_config, 'cannot learn ANN %s now: there are not enough %s learn vectors (has %s vectors; %s required)', ann_key, what, lens, rule.train.max_trains) end end else - rspamd_logger.debugm(N, rspamd_config, + lua_util.debugm(N, rspamd_config, 'checked %s vectors in ANN %s: %s vectors; %s required, need to check other class vectors', what, ann_key, ntrains, rule.train.max_trains) cont_cb() diff --git a/src/plugins/lua/p0f.lua b/src/plugins/lua/p0f.lua index 97757c23a..727e6d1bb 100644 --- a/src/plugins/lua/p0f.lua +++ b/src/plugins/lua/p0f.lua @@ -105,7 +105,7 @@ if rule then end for sym in pairs(rule.patterns) do - rspamd_logger.debugm(N, rspamd_config, 'registering: %1', { + lua_util.debugm(N, rspamd_config, 'registering: %1', { type = 'virtual', name = sym, parent = id, diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua index 05e08c0f4..3f5c9e634 100644 --- a/src/plugins/lua/phishing.lua +++ b/src/plugins/lua/phishing.lua @@ -71,7 +71,7 @@ end local function phishing_cb(task) local function check_phishing_map(table) local phishing_data = {} - for k,v in pairs(table) do + for k, v in pairs(table) do phishing_data[k] = v end local url = phishing_data.url @@ -89,6 +89,8 @@ local function phishing_cb(task) local data = nil if elt then + lua_util.debugm(N, task, 'found host element: %s', + host) local path = url:get_path() local query = url:get_query() @@ -156,7 +158,7 @@ local function phishing_cb(task) local function check_phishing_dns(table) local phishing_data = {} - for k,v in pairs(table) do + for k, v in pairs(table) do phishing_data[k] = v end local url = phishing_data.url @@ -192,7 +194,7 @@ local function phishing_cb(task) end local to_resolve_hp = compose_dns_query({ host, path }) - rspamd_logger.debugm(N, task, 'try to resolve {%s, %s} -> %s', + lua_util.debugm(N, task, 'try to resolve {%s, %s} -> %s', host, path, to_resolve_hp) r:resolve_txt({ task = task, @@ -207,7 +209,7 @@ local function phishing_cb(task) end local to_resolve_hpq = compose_dns_query({ host, path, query }) - rspamd_logger.debugm(N, task, 'try to resolve {%s, %s, %s} -> %s', + lua_util.debugm(N, task, 'try to resolve {%s, %s, %s} -> %s', host, path, query, to_resolve_hpq) r:resolve_txt({ task = task, @@ -256,9 +258,14 @@ local function phishing_cb(task) end if url:is_phished() then + local surl = tostring(url) local purl + lua_util.debugm(N, task, 'found phished url: %s', + surl) if url:is_redirected() then + lua_util.debugm(N, task, 'url %s is also been redirected', + surl) local rspamd_url = require "rspamd_url" -- Examine the real redirect target instead of the url local redirected_url = url:get_redirected() @@ -268,6 +275,7 @@ local function phishing_cb(task) purl = rspamd_url.create(task:get_mempool(), url:get_visible()) url = redirected_url + surl = string.format("redirected(%s)", tostring(url)) else purl = url:get_phished() end @@ -276,6 +284,10 @@ local function phishing_cb(task) return end + local spurl = tostring(purl) + lua_util.debugm(N, task, 'processing pair %s -> %s', + surl, spurl) + local tld = url:get_tld() local ptld = purl:get_tld() @@ -308,10 +320,11 @@ local function phishing_cb(task) local weight = 1.0 local spoofed, why = util.is_utf_spoofed(tld, ptld) if spoofed then - lua_util.debugm(N, task, "confusable: %1 -> %2: %3", tld, ptld, why) + lua_util.debugm(N, task, "confusable: %s -> %s: %s", tld, ptld, why) weight = 1.0 else - local dist = util.levenshtein_distance(stripped_tld, stripped_ptld, 2) + local dist = (stripped_tld == stripped_ptld) and 0 + or util.levenshtein_distance(stripped_tld, stripped_ptld, 2) dist = 2 * dist / (#stripped_tld + #stripped_ptld) if dist > 0.3 and dist <= 1.0 then @@ -330,15 +343,18 @@ local function phishing_cb(task) if a1 ~= a2 then weight = 1 - lua_util.debugm(N, task, "confusable: %1 -> %2: different characters", - tld, ptld, why) + lua_util.debugm(N, task, "confusable: %s -> %s: different characters", + tld, ptld) else -- We have totally different strings in tld, so penalize it somehow weight = 0.5 end + elseif dist == 0 then + -- Same domains, not phishing! + weight = 0.0 end - lua_util.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist) + lua_util.debugm(N, task, "distance: %s -> %s: %s; weight = %s", tld, ptld, dist, weight) end local function is_url_in_map(map, furl) @@ -368,15 +384,17 @@ local function phishing_cb(task) end end - found_in_map(strict_domains_maps, purl, 1.0) - if not found_in_map(anchor_exceptions_maps) then - if not found_in_map(phishing_exceptions_maps, purl, 1.0) then - if domains then - if is_url_in_map(domains, purl) then + if weight > 0 then + found_in_map(strict_domains_maps, purl, 1.0) + if not found_in_map(anchor_exceptions_maps) then + if not found_in_map(phishing_exceptions_maps, purl, 1.0) then + if domains then + if is_url_in_map(domains, purl) then + task:insert_result(symbol, weight, ptld .. '->' .. tld) + end + else task:insert_result(symbol, weight, ptld .. '->' .. tld) end - else - task:insert_result(symbol, weight, ptld .. '->' .. tld) end end end diff --git a/src/plugins/lua/reputation.lua b/src/plugins/lua/reputation.lua index a3af26c91..bd7d91932 100644 --- a/src/plugins/lua/reputation.lua +++ b/src/plugins/lua/reputation.lua @@ -955,7 +955,7 @@ local function reputation_redis_init(rule, cfg, ev_base, worker) local get_script = lua_util.jinja_template(redis_get_script_tpl, { windows = rule.backend.config.buckets }) - rspamd_logger.debugm(N, rspamd_config, 'added extraction script %s', get_script) + lua_util.debugm(N, rspamd_config, 'added extraction script %s', get_script) rule.backend.script_get = lua_redis.add_redis_script(get_script, our_redis_params) -- Redis script to update Redis buckets @@ -1003,7 +1003,7 @@ local function reputation_redis_init(rule, cfg, ev_base, worker) local set_script = lua_util.jinja_template(redis_adaptive_emea_script_tpl, { windows = rule.backend.config.buckets }) - rspamd_logger.debugm(N, rspamd_config, 'added emea update script %s', set_script) + lua_util.debugm(N, rspamd_config, 'added emea update script %s', set_script) rule.backend.script_set = lua_redis.add_redis_script(set_script, our_redis_params) return true diff --git a/src/plugins/lua/url_redirector.lua b/src/plugins/lua/url_redirector.lua index 10b5fb255..c1fa85cae 100644 --- a/src/plugins/lua/url_redirector.lua +++ b/src/plugins/lua/url_redirector.lua @@ -182,7 +182,7 @@ local function resolve_cached(task, orig_url, url, key, ntries) local function resolve_url() if ntries > settings.nested_limit then -- We cannot resolve more, stop - rspamd_logger.debugm(N, task, 'cannot get more requests to resolve %s, stop on %s after %s attempts', + lua_util.debugm(N, task, 'cannot get more requests to resolve %s, stop on %s after %s attempts', orig_url, url, ntries) cache_url(task, orig_url, url, key, 'nested') local str_orig_url = tostring(orig_url) @@ -223,7 +223,7 @@ local function resolve_cached(task, orig_url, url, key, ntries) if loc then redir_url = rspamd_url.create(task:get_mempool(), loc) end - rspamd_logger.debugm(N, task, 'found redirect from %s to %s, err code %s', + lua_util.debugm(N, task, 'found redirect from %s to %s, err code %s', orig_url, loc, code) if redir_url then @@ -239,11 +239,11 @@ local function resolve_cached(task, orig_url, url, key, ntries) resolve_cached(task, orig_url, redir_url, key, ntries + 1) end else - rspamd_logger.debugm(N, task, "no location, headers: %s", headers) + lua_util.debugm(N, task, "no location, headers: %s", headers) cache_url(task, orig_url, url, key) end else - rspamd_logger.debugm(N, task, 'found redirect error from %s to %s, err code: %s', + lua_util.debugm(N, task, 'found redirect error from %s to %s, err code: %s', orig_url, url, code) cache_url(task, orig_url, url, key) end @@ -278,7 +278,7 @@ local function resolve_cached(task, orig_url, url, key, ntries) if type(data) == 'string' then if data ~= 'processing' then -- Got cached result - rspamd_logger.debugm(N, task, 'found cached redirect from %s to %s', + lua_util.debugm(N, task, 'found cached redirect from %s to %s', url, data) if data:sub(1, 1) == '^' then -- Prefixed url stored |