diff options
Diffstat (limited to 'src/plugins/lua/elastic.lua')
-rw-r--r-- | src/plugins/lua/elastic.lua | 172 |
1 files changed, 72 insertions, 100 deletions
diff --git a/src/plugins/lua/elastic.lua b/src/plugins/lua/elastic.lua index 8bed9fcf4..b26fbd8e8 100644 --- a/src/plugins/lua/elastic.lua +++ b/src/plugins/lua/elastic.lua @@ -19,6 +19,7 @@ local rspamd_logger = require 'rspamd_logger' local rspamd_http = require "rspamd_http" local lua_util = require "lua_util" local rspamd_util = require "rspamd_util" +local rspamd_text = require "rspamd_text" local ucl = require "ucl" local upstream_list = require "rspamd_upstream_list" @@ -287,7 +288,7 @@ end local function handle_error(action, component, limit) if states[component]['errors'] >= limit then rspamd_logger.errx(rspamd_config, 'cannot %s elastic %s, failed attempts: %s/%s, stop trying', - action, component:gsub('_', ' '), states[component]['errors'], limit) + action, component:gsub('_', ' '), states[component]['errors'], limit) states[component]['configured'] = true else states[component]['errors'] = states[component]['errors'] + 1 @@ -315,54 +316,6 @@ local function get_received_delay(received_headers) return delay end -local function is_empty(str) - -- define a pattern that includes invisible unicode characters - local str_cleared = str:gsub('[' .. - '\xC2\xA0' .. -- U+00A0 non-breaking space - '\xE2\x80\x8B' .. -- U+200B zero width space - '\xEF\xBB\xBF' .. -- U+FEFF byte order mark (zero width no-break space) - '\xE2\x80\x8C' .. -- U+200C zero width non-joiner - '\xE2\x80\x8D' .. -- U+200D zero width joiner - '\xE2\x80\x8E' .. -- U+200E left-to-right mark - '\xE2\x80\x8F' .. -- U+200F right-to-left mark - '\xE2\x81\xA0' .. -- U+2060 word joiner - '\xE2\x80\xAA' .. -- U+202A left-to-right embedding - '\xE2\x80\xAB' .. -- U+202B right-to-left embedding - '\xE2\x80\xAC' .. -- U+202C pop directional formatting - '\xE2\x80\xAD' .. -- U+202D left-to-right override - '\xE2\x80\xAE' .. -- U+202E right-to-left override - '\xE2\x81\x9F' .. -- U+2061 function application - '\xE2\x81\xA1' .. -- U+2061 invisible separator - '\xE2\x81\xA2' .. -- U+2062 invisible times - '\xE2\x81\xA3' .. -- U+2063 invisible separator - '\xE2\x81\xA4' .. -- U+2064 invisible plus - ']', '') -- gsub replaces all matched characters with an empty string - if str_cleared:match('[%S]') then - return false - else - return true - end -end - -local function fill_empty_strings(tbl, empty_value) - local filled_tbl = {} - for key, value in pairs(tbl) do - if value and type(value) == 'table' then - local nested_filtered = fill_empty_strings(value, empty_value) - if next(nested_filtered) ~= nil then - filled_tbl[key] = nested_filtered - end - elseif type(value) == 'boolean' then - filled_tbl[key] = value - elseif value and type(value) == 'string' and is_empty(value) then - filled_tbl[key] = empty_value - elseif value then - filled_tbl[key] = value - end - end - return filled_tbl -end - local function create_bulk_json(es_index, logs_to_send) local tbl = {} for _, row in pairs(logs_to_send) do @@ -407,15 +360,17 @@ local function elastic_send_data(flush_all, task, cfg, ev_base) local function http_callback(err, code, body, _) local push_done = false if err then - rspamd_logger.errx(log_object, 'cannot send logs to elastic (%s): %s; failed attempts: %s/%s', - push_url, err, buffer['errors'], settings['limits']['max_fail']) + rspamd_logger.errx(log_object, + 'cannot send logs to elastic (%s): %s; failed attempts: %s/%s', + push_url, err, buffer['errors'], settings['limits']['max_fail']) elseif code == 200 then local parser = ucl.parser() local res, ucl_err = parser:parse_string(body) if not ucl_err and res then local obj = parser:get_object() push_done = true - lua_util.debugm(N, log_object, 'successfully sent payload with %s logs', nlogs_to_send) + lua_util.debugm(N, log_object, + 'successfully sent payload with %s logs', nlogs_to_send) if obj['errors'] then for _, value in pairs(obj['items']) do if value['index'] and value['index']['status'] >= 400 then @@ -424,15 +379,15 @@ local function elastic_send_data(flush_all, task, cfg, ev_base) local error_type = safe_get(value, 'index', 'error', 'type') or '' local error_reason = safe_get(value, 'index', 'error', 'reason') or '' rspamd_logger.warnx(log_object, - 'error while pushing logs to elastic, status: %s, index: %s, type: %s, reason: %s', - status, index, error_type, error_reason) + 'error while pushing logs to elastic, status: %s, index: %s, type: %s, reason: %s', + status, index, error_type, error_reason) end end end else rspamd_logger.errx(log_object, - 'cannot parse response from elastic (%s): %s; failed attempts: %s/%s', - push_url, ucl_err, buffer['errors'], settings['limits']['max_fail']) + 'cannot parse response from elastic (%s): %s; failed attempts: %s/%s', + push_url, ucl_err, buffer['errors'], settings['limits']['max_fail']) end else rspamd_logger.errx(log_object, @@ -448,8 +403,8 @@ local function elastic_send_data(flush_all, task, cfg, ev_base) upstream:fail() if buffer['errors'] >= settings['limits']['max_fail'] then rspamd_logger.errx(log_object, - 'failed to send %s log lines, failed attempts: %s/%s, removing failed logs from bugger', - nlogs_to_send, buffer['errors'], settings['limits']['max_fail']) + 'failed to send %s log lines, failed attempts: %s/%s, removing failed logs from bugger', + nlogs_to_send, buffer['errors'], settings['limits']['max_fail']) buffer['logs']:pop_first(nlogs_to_send) buffer['errors'] = 0 else @@ -494,6 +449,7 @@ local function get_general_metadata(task) local empty = settings['index_template']['empty_value'] local user = task:get_user() r.rspamd_server = rspamd_hostname or empty + r.digest = task:get_digest() or empty r.action = task:get_metric_action() or empty r.score = task:get_metric_score()[1] or 0 @@ -565,8 +521,8 @@ local function get_general_metadata(task) if task:has_from('smtp') then local from = task:get_from({ 'smtp', 'orig' })[1] if from and - from['user'] and #from['user'] > 0 and - from['domain'] and #from['domain'] > 0 + from['user'] and #from['user'] > 0 and + from['domain'] and #from['domain'] > 0 then r.from_user = from['user'] r.from_domain = from['domain']:lower() @@ -578,8 +534,8 @@ local function get_general_metadata(task) if task:has_from('mime') then local mime_from = task:get_from({ 'mime', 'orig' })[1] if mime_from and - mime_from['user'] and #mime_from['user'] > 0 and - mime_from['domain'] and #mime_from['domain'] > 0 + mime_from['user'] and #mime_from['user'] > 0 and + mime_from['domain'] and #mime_from['domain'] > 0 then r.mime_from_user = mime_from['user'] r.mime_from_domain = mime_from['domain']:lower() @@ -608,25 +564,34 @@ local function get_general_metadata(task) local function process_header(name) local hdr = task:get_header_full(name) - local headers_text_ignore_above = settings['index_template']['headers_text_ignore_above'] - 3 if hdr and #hdr > 0 then local l = {} for _, h in ipairs(hdr) do - if settings['index_template']['headers_count_ignore_above'] ~= 0 and - #l >= settings['index_template']['headers_count_ignore_above'] + if settings['index_template']['headers_count_ignore_above'] > 0 and + #l >= settings['index_template']['headers_count_ignore_above'] then table.insert(l, 'ignored above...') break end local header - if settings['index_template']['headers_text_ignore_above'] ~= 0 and - h.decoded and #h.decoded >= headers_text_ignore_above - then - header = h.decoded:sub(1, headers_text_ignore_above) .. '...' - elseif h.decoded and #h.decoded > 0 then - header = h.decoded + local header_len + if h.decoded then + header = rspamd_text.fromstring(h.decoded) + header_len = header:len_utf8() else - header = empty + table.insert(l, empty) + break + end + if not header_len or header_len == 0 then + table.insert(l, empty) + break + end + if settings['index_template']['headers_text_ignore_above'] > 0 and + header_len >= settings['index_template']['headers_text_ignore_above'] + then + header = header:sub_utf8(1, settings['index_template']['headers_text_ignore_above']) + table.insert(l, header .. rspamd_text.fromstring('...')) + break end table.insert(l, header) end @@ -686,7 +651,7 @@ local function get_general_metadata(task) r.received_delay = get_received_delay(task:get_received_headers()) - return fill_empty_strings(r, empty) + return r end local function elastic_collect(task) @@ -773,8 +738,8 @@ local function configure_geoip_pipeline(cfg, ev_base) upstream:ok() else rspamd_logger.errx(rspamd_config, - 'cannot configure elastic geoip pipeline (%s), status code: %s, response: %s', - geoip_url, code, body) + 'cannot configure elastic geoip pipeline (%s), status code: %s, response: %s', + geoip_url, code, body) upstream:fail() handle_error('configure', 'geoip_pipeline', settings['limits']['max_fail']) end @@ -810,8 +775,9 @@ local function put_index_policy(cfg, ev_base, upstream, host, policy_url, index_ states['index_policy']['configured'] = true upstream:ok() else - rspamd_logger.errx(rspamd_config, 'cannot configure elastic index policy (%s), status code: %s, response: %s', - policy_url, code, body) + rspamd_logger.errx(rspamd_config, + 'cannot configure elastic index policy (%s), status code: %s, response: %s', + policy_url, code, body) upstream:fail() handle_error('configure', 'index_policy', settings['limits']['max_fail']) end @@ -867,7 +833,7 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_ if not lua_util.table_cmp(our_policy['policy']['default_state'], current_default_state) then update_needed = true elseif not lua_util.table_cmp(our_policy['policy']['ism_template'][1]['index_patterns'], - current_ism_index_patterns) then + current_ism_index_patterns) then update_needed = true elseif not lua_util.table_cmp(our_policy['policy']['states'], current_states) then update_needed = true @@ -890,8 +856,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_ put_index_policy(cfg, ev_base, upstream, host, policy_url, index_policy_json) else rspamd_logger.errx(rspamd_config, - 'current elastic index policy (%s) not returned correct seq_no/primary_term, policy will not be updated, response: %s', - policy_url, body) + 'current elastic index policy (%s) not returned correct seq_no/primary_term, policy will not be updated, response: %s', + policy_url, body) upstream:fail() handle_error('validate current', 'index_policy', settings['limits']['max_fail']) end @@ -909,8 +875,8 @@ local function get_index_policy(cfg, ev_base, upstream, host, policy_url, index_ end else rspamd_logger.errx(rspamd_config, - 'cannot get current elastic index policy (%s), status code: %s, response: %s', - policy_url, code, body) + 'cannot get current elastic index policy (%s), status code: %s, response: %s', + policy_url, code, body) handle_error('get current', 'index_policy', settings['limits']['max_fail']) upstream:fail() end @@ -1037,7 +1003,7 @@ local function configure_index_policy(cfg, ev_base) } index_policy['policy']['phases']['delete'] = delete_obj end - -- opensearch state policy with hot state + -- opensearch state policy with hot state elseif detected_distro['name'] == 'opensearch' then local retry = { count = 3, @@ -1313,6 +1279,7 @@ local function configure_index_template(cfg, ev_base) type = 'object', properties = { rspamd_server = t_keyword, + digest = t_keyword, action = t_keyword, score = t_double, symbols = symbols_obj, @@ -1381,7 +1348,7 @@ local function configure_index_template(cfg, ev_base) upstream:ok() else rspamd_logger.errx(rspamd_config, 'cannot configure elastic index template (%s), status code: %s, response: %s', - template_url, code, body) + template_url, code, body) upstream:fail() handle_error('configure', 'index_template', settings['limits']['max_fail']) end @@ -1424,8 +1391,9 @@ local function verify_distro(manual) local supported_distro_info = supported_distro[detected_distro_name] -- check that detected_distro_version is valid if not detected_distro_version or type(detected_distro_version) ~= 'string' then - rspamd_logger.errx(rspamd_config, 'elastic version should be a string, but we received: %s', - type(detected_distro_version)) + rspamd_logger.errx(rspamd_config, + 'elastic version should be a string, but we received: %s', + type(detected_distro_version)) valid = false elseif detected_distro_version == '' then rspamd_logger.errx(rspamd_config, 'unsupported elastic version: empty string') @@ -1434,21 +1402,22 @@ local function verify_distro(manual) -- compare versions using compare_versions local cmp_from = compare_versions(detected_distro_version, supported_distro_info['from']) if cmp_from == -1 then - rspamd_logger.errx(rspamd_config, 'unsupported elastic version: %s, minimal supported version of %s is %s', - detected_distro_version, detected_distro_name, supported_distro_info['from']) + rspamd_logger.errx(rspamd_config, + 'unsupported elastic version: %s, minimal supported version of %s is %s', + detected_distro_version, detected_distro_name, supported_distro_info['from']) valid = false else local cmp_till = compare_versions(detected_distro_version, supported_distro_info['till']) if (cmp_till >= 0) and not supported_distro_info['till_unknown'] then rspamd_logger.errx(rspamd_config, - 'unsupported elastic version: %s, maximum supported version of %s is less than %s', - detected_distro_version, detected_distro_name, supported_distro_info['till']) + 'unsupported elastic version: %s, maximum supported version of %s is less than %s', + detected_distro_version, detected_distro_name, supported_distro_info['till']) valid = false elseif (cmp_till >= 0) and supported_distro_info['till_unknown'] then rspamd_logger.warnx(rspamd_config, - 'compatibility of elastic version: %s is unknown, maximum known supported version of %s is less than %s,' .. - 'use at your own risk', - detected_distro_version, detected_distro_name, supported_distro_info['till']) + 'compatibility of elastic version: %s is unknown, maximum known ' .. + 'supported version of %s is less than %s, use at your own risk', + detected_distro_version, detected_distro_name, supported_distro_info['till']) valid_unknown = true end end @@ -1460,11 +1429,12 @@ local function verify_distro(manual) else if valid and manual then rspamd_logger.infox( - rspamd_config, 'assuming elastic distro: %s, version: %s', detected_distro_name, detected_distro_version) + rspamd_config, 'assuming elastic distro: %s, version: %s', detected_distro_name, detected_distro_version) detected_distro['supported'] = true elseif valid and not manual then - rspamd_logger.infox(rspamd_config, 'successfully connected to elastic distro: %s, version: %s', - detected_distro_name, detected_distro_version) + rspamd_logger.infox(rspamd_config, + 'successfully connected to elastic distro: %s, version: %s', + detected_distro_name, detected_distro_version) detected_distro['supported'] = true else handle_error('configure', 'distro', settings['version']['autodetect_max_fail']) @@ -1477,7 +1447,7 @@ local function configure_distro(cfg, ev_base) detected_distro['name'] = settings['version']['override']['name'] detected_distro['version'] = settings['version']['override']['version'] rspamd_logger.infox(rspamd_config, - 'automatic detection of elastic distro and version is disabled, taking configuration from settings') + 'automatic detection of elastic distro and version is disabled, taking configuration from settings') verify_distro(true) end @@ -1490,14 +1460,16 @@ local function configure_distro(cfg, ev_base) rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s): %s', root_url, err) upstream:fail() elseif code ~= 200 then - rspamd_logger.errx(rspamd_config, 'cannot connect to elastic (%s), status code: %s, response: %s', root_url, code, - body) + rspamd_logger.errx(rspamd_config, + 'cannot connect to elastic (%s), status code: %s, response: %s', + root_url, code, body) upstream:fail() else local parser = ucl.parser() local res, ucl_err = parser:parse_string(body) if not res then - rspamd_logger.errx(rspamd_config, 'failed to parse reply from elastic (%s): %s', root_url, ucl_err) + rspamd_logger.errx(rspamd_config, 'failed to parse reply from elastic (%s): %s', + root_url, ucl_err) upstream:fail() else local obj = parser:get_object() |