--[[ Copyright (c) 2021, Alexander Moisseev Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]]-- --[[[ -- @module cloudmark -- This module contains Cloudmark v2 interface --]] local lua_util = require "lua_util" local http = require "rspamd_http" local upstream_list = require "rspamd_upstream_list" local rspamd_logger = require "rspamd_logger" local ucl = require "ucl" local rspamd_util = require "rspamd_util" local common = require "lua_scanners/common" local fun = require "fun" local lua_mime = require "lua_mime" local N = 'cloudmark' -- Boundary for multipart transfers, generated on module init local static_boundary = rspamd_util.random_hex(32) local function cloudmark_url(rule, addr, maybe_url) local url local port = addr:get_port() maybe_url = maybe_url or rule.url if port == 0 then port = rule.default_port end if rule.use_https then url = string.format('https://%s:%d%s', tostring(addr), port, maybe_url) else url = string.format('http://%s:%d%s', tostring(addr), port, maybe_url) end return url end -- Detect cloudmark max size local function cloudmark_preload(rule, cfg, ev_base, _) local upstream = rule.upstreams:get_upstream_round_robin() local addr = upstream:get_addr() local function max_message_size_cb(http_err, code, body, _) if http_err then rspamd_logger.errx(ev_base, 'HTTP error when getting max message size: %s', http_err) return end if code ~= 200 then rspamd_logger.errx(ev_base, 'bad HTTP code when getting max message size: %s', code) end local parser = ucl.parser() local ret, err = parser:parse_string(body) if not ret then rspamd_logger.errx(ev_base, 'could not parse response body [%s]: %s', body, err) return end local obj = parser:get_object() local ms = obj.maxMessageSize if not ms then rspamd_logger.errx(ev_base, 'missing maxMessageSize in the response body (JSON): %s', obj) return end rule.max_size = ms lua_util.debugm(N, cfg, 'set maximum message size set to %s bytes', ms) end http.request({ ev_base = ev_base, config = cfg, url = cloudmark_url(rule, addr, '/score/v2/max-message-size'), callback = max_message_size_cb, }) end local function numerify(d) local l = {} for k in pairs(d) do table.insert(l, k) end for _, k in ipairs(l) do local new_key = tonumber(k) if new_key then d[new_key] = d[k] d[k] = nil end end end local function cloudmark_config(opts) local cloudmark_conf = { name = N, default_port = 2713, url = '/score/v2/message', use_https = false, timeout = 5.0, log_clean = false, retransmits = 1, score_threshold = 90, -- minimum score to considerate reply message = '${SCANNER}: spam message found: "${VIRUS}"', max_message = 0, detection_category = "hash", default_score = 1, action = false, log_spamcause = true, symbol_fail = 'CLOUDMARK_FAIL', symbol = 'CLOUDMARK_CHECK', symbol_spam = 'CLOUDMARK_SPAM', add_score_header = false, -- Add X-CMAE-Score header add_headers = false, -- allow addition of the headers from Cloudmark scores_symbols = nil, -- a table with match { [score_threshold] = symbol, ... } } cloudmark_conf = lua_util.override_defaults(cloudmark_conf, opts) if type(cloudmark_conf.scores_symbols) == 'table' then numerify(cloudmark_conf.scores_symbols) end if not cloudmark_conf.prefix then cloudmark_conf.prefix = 'rs_' .. cloudmark_conf.name .. '_' end if not cloudmark_conf.log_prefix then if cloudmark_conf.name:lower() == cloudmark_conf.type:lower() then cloudmark_conf.log_prefix = cloudmark_conf.name else cloudmark_conf.log_prefix = cloudmark_conf.name .. ' (' .. cloudmark_conf.type .. ')' end end if not cloudmark_conf.servers and cloudmark_conf.socket then cloudmark_conf.servers = cloudmark_conf.socket end if not cloudmark_conf.servers then rspamd_logger.errx(rspamd_config, 'no servers defined') return nil end cloudmark_conf.upstreams = upstream_list.create(rspamd_config, cloudmark_conf.servers, cloudmark_conf.default_port) if cloudmark_conf.upstreams then cloudmark_conf.symbols = { { symbol = cloudmark_conf.symbol_spam, score = 5.0 } } cloudmark_conf.preloads = { cloudmark_preload } lua_util.add_debug_alias('external_services', cloudmark_conf.name) return cloudmark_conf end rspamd_logger.errx(rspamd_config, 'cannot parse servers %s', cloudmark_conf['servers']) return nil end -- Converts a key-value map to the table representing multipart body, with the following values: -- `data`: data of the part -- `filename`: optional filename -- `content-type`: content type of the element (optional) -- `content-transfer-encoding`: optional CTE header local function table_to_multipart_body(tbl, boundary) local seen_data = false local out = {} for k, v in pairs(tbl) do if v.data then seen_data = true table.insert(out, string.format('--%s\r\n', boundary)) if v.filename then table.insert(out, string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n', k, v.filename)) else table.insert(out, string.format('Content-Disposition: form-data; name="%s"\r\n', k)) end if v['content-type'] then table.insert(out, string.format('Content-Type: %s\r\n', v['content-type'])) else table.insert(out, 'Content-Type: text/plain\r\n') end if v['content-transfer-encoding'] then table.insert(out, string.format('Content-Transfer-Encoding: %s\r\n', v['content-transfer-encoding'])) else table.insert(out, 'Content-Transfer-Encoding: binary\r\n') end table.insert(out, '\r\n') table.insert(out, v.data) table.insert(out, '\r\n') end end if seen_data then table.insert(out, string.format('--%s--\r\n', boundary)) end return out end local function get_specific_symbol(scores_symbols, score) local selected local sel_thr = -1 for threshold, sym in pairs(scores_symbols) do if sel_thr < threshold and threshold <= score then selected = sym sel_thr = threshold end end return selected end assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM') assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 80) == nil) assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 100) == 'CLOUDMARK_SPAM') assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 80) == 'CLOUDMARK_SPAM2') assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 70) == nil) assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 90) == 'CLOUDMARK_SPAM') assert(get_specific_symbol({ }, 80) == nil) assert(get_specific_symbol({ [100] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM') assert(get_specific_symbol({ [0] = 'CLOUDMARK_SPAM' }, 0) == 'CLOUDMARK_SPAM') local function parse_cloudmark_reply(task, rule, body) local parser = ucl.parser() local ret, err = parser:parse_string(body) if not ret then rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body) task:insert_result(rule.symbol_fail, 1.0, 'Parser error: ' .. err) return end local obj = parser:get_object() lua_util.debugm(N, task, 'cloudmark reply is: %s', obj) if not obj.score then rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body) task:insert_result(rule.symbol_fail, 1.0, 'Parser error: no score') return end if obj.analysis then -- Report analysis string rspamd_logger.infox(task, 'cloudmark report string: %s', obj.analysis) end local score = tonumber(obj.score) or 0 if score >= rule.score_threshold then task:insert_result(rule.symbol_spam, 1.0, tostring(score)) end if rule.add_headers and type(obj.appendHeaders) == 'table' then local headers_add = fun.tomap(fun.map(function(h) return h.headerField, { order = 1, value = h.body } end, obj.appendHeaders)) lua_mime.modify_headers(task, { add = headers_add }) end if rule.add_score_header then lua_mime.modify_headers(task, { add = { ['X-CMAE-Score'] = { order = 1, value = tostring(score) } } }) end if type(rule.scores_symbols) == 'table' then local sym = get_specific_symbol(rule.scores_symbols, score) if sym then task:insert_result(sym, 1.0, tostring(score)) end end end local function cloudmark_check(task, content, digest, rule, maybe_part) local function cloudmark_check_uncached() local upstream = rule.upstreams:get_upstream_round_robin() local addr = upstream:get_addr() local retransmits = rule.retransmits local url = cloudmark_url(rule, addr) local message_data = task:get_content() if rule.max_message and rule.max_message > 0 and #message_data > rule.max_message then task:insert_result(rule['symbol_fail'], 0.0, 'Message too large: ' .. #message_data) return end local request = { rfc822 = { ['Content-Type'] = 'message/rfc822', data = message_data, } } local helo = task:get_helo() if helo then request['heloDomain'] = { data = helo, } end local mail_from = task:get_from('smtp') or {} if mail_from[1] and #mail_from[1].addr > 1 then request['mailFrom'] = { data = mail_from[1].addr } end local rcpt_to = task:get_recipients('smtp') if rcpt_to then request['rcptTo'] = { data = table.concat(fun.totable(fun.map(function(r) return r.addr end, rcpt_to)), ',') } end local fip = task:get_from_ip() if fip and fip:is_valid() then request['connIp'] = tostring(fip) end local hostname = task:get_hostname() if hostname then request['fromHost'] = hostname end local request_data = { task = task, url = url, body = table_to_multipart_body(request, static_boundary), headers = { ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary) }, timeout = rule.timeout, } local function cloudmark_callback(http_err, code, body, headers) local function cloudmark_requery() -- set current upstream to fail because an error occurred upstream:fail() -- retry with another upstream until retransmits exceeds if retransmits > 0 then retransmits = retransmits - 1 lua_util.debugm(rule.name, task, '%s: request Error: %s - retries left: %s', rule.log_prefix, http_err, retransmits) -- Select a different upstream! upstream = rule.upstreams:get_upstream_round_robin() addr = upstream:get_addr() url = cloudmark_url(rule, addr) lua_util.debugm(rule.name, task, '%s: retry IP: %s:%s', rule.log_prefix, addr, addr:get_port()) request_data.url = url http.request(request_data) else rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits ' .. 'exceed', rule.log_prefix) task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and ' .. 'retransmits exceed') upstream:fail() end end if http_err then cloudmark_requery() else -- Parse the response if upstream then upstream:ok() end if code ~= 200 then rspamd_logger.errx(task, 'invalid HTTP code: %s, body: %s, headers: %s', code, body, headers) task:insert_result(rule.symbol_fail, 1.0, 'Bad HTTP code: ' .. code) return end parse_cloudmark_reply(task, rule, body) end end request_data.callback = cloudmark_callback http.request(request_data) end if common.condition_check_and_continue(task, content, rule, digest, cloudmark_check_uncached, maybe_part) then return else cloudmark_check_uncached() end end return { type = { 'cloudmark', 'scanner' }, description = 'Cloudmark cartridge interface', configure = cloudmark_config, check = cloudmark_check, name = N, }