123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416 |
- --[[
- Copyright (c) 2021, Alexander Moisseev <moiseev@mezonplus.ru>
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ]]--
-
- --[[[
- -- @module cloudmark
- -- This module contains Cloudmark v2 interface
- --]]
-
- local lua_util = require "lua_util"
- local http = require "rspamd_http"
- local upstream_list = require "rspamd_upstream_list"
- local rspamd_logger = require "rspamd_logger"
- local ucl = require "ucl"
- local rspamd_util = require "rspamd_util"
- local common = require "lua_scanners/common"
- local fun = require "fun"
- local lua_mime = require "lua_mime"
-
- local N = 'cloudmark'
- -- Boundary for multipart transfers, generated on module init
- local static_boundary = rspamd_util.random_hex(32)
-
- local function cloudmark_url(rule, addr, maybe_url)
- local url
- local port = addr:get_port()
-
- maybe_url = maybe_url or rule.url
- if port == 0 then
- port = rule.default_port
- end
- if rule.use_https then
- url = string.format('https://%s:%d%s', tostring(addr),
- port, maybe_url)
- else
- url = string.format('http://%s:%d%s', tostring(addr),
- port, maybe_url)
- end
-
- return url
- end
-
- -- Detect cloudmark max size
- local function cloudmark_preload(rule, cfg, ev_base, _)
- local upstream = rule.upstreams:get_upstream_round_robin()
- local addr = upstream:get_addr()
- local function max_message_size_cb(http_err, code, body, _)
- if http_err then
- rspamd_logger.errx(ev_base, 'HTTP error when getting max message size: %s',
- http_err)
- return
- end
- if code ~= 200 then
- rspamd_logger.errx(ev_base, 'bad HTTP code when getting max message size: %s', code)
- end
- local parser = ucl.parser()
- local ret, err = parser:parse_string(body)
- if not ret then
- rspamd_logger.errx(ev_base, 'could not parse response body [%s]: %s', body, err)
- return
- end
- local obj = parser:get_object()
- local ms = obj.maxMessageSize
- if not ms then
- rspamd_logger.errx(ev_base, 'missing maxMessageSize in the response body (JSON): %s', obj)
- return
- end
-
- rule.max_size = ms
- lua_util.debugm(N, cfg, 'set maximum message size set to %s bytes', ms)
- end
- http.request({
- ev_base = ev_base,
- config = cfg,
- url = cloudmark_url(rule, addr, '/score/v2/max-message-size'),
- callback = max_message_size_cb,
- })
- end
-
- local function cloudmark_config(opts)
-
- local cloudmark_conf = {
- name = N,
- default_port = 2713,
- url = '/score/v2/message',
- use_https = false,
- timeout = 5.0,
- log_clean = false,
- retransmits = 1,
- score_threshold = 90, -- minimum score to considerate reply
- message = '${SCANNER}: spam message found: "${VIRUS}"',
- max_message = 0,
- detection_category = "hash",
- default_score = 1,
- action = false,
- log_spamcause = true,
- symbol_fail = 'CLOUDMARK_FAIL',
- symbol = 'CLOUDMARK_CHECK',
- symbol_spam = 'CLOUDMARK_SPAM',
- add_score_header = false, -- Add X-CMAE-Score header
- add_headers = false, -- allow addition of the headers from Cloudmark
- scores_symbols = nil, -- a table with match { [score_threshold] = symbol, ... }
- }
-
- cloudmark_conf = lua_util.override_defaults(cloudmark_conf, opts)
-
- if not cloudmark_conf.prefix then
- cloudmark_conf.prefix = 'rs_' .. cloudmark_conf.name .. '_'
- end
-
- if not cloudmark_conf.log_prefix then
- if cloudmark_conf.name:lower() == cloudmark_conf.type:lower() then
- cloudmark_conf.log_prefix = cloudmark_conf.name
- else
- cloudmark_conf.log_prefix = cloudmark_conf.name .. ' (' .. cloudmark_conf.type .. ')'
- end
- end
-
- if not cloudmark_conf.servers and cloudmark_conf.socket then
- cloudmark_conf.servers = cloudmark_conf.socket
- end
-
- if not cloudmark_conf.servers then
- rspamd_logger.errx(rspamd_config, 'no servers defined')
-
- return nil
- end
-
- cloudmark_conf.upstreams = upstream_list.create(rspamd_config,
- cloudmark_conf.servers,
- cloudmark_conf.default_port)
-
- if cloudmark_conf.upstreams then
-
- cloudmark_conf.symbols = { { symbol = cloudmark_conf.symbol_spam, score = 5.0 } }
- cloudmark_conf.preloads = { cloudmark_preload }
- lua_util.add_debug_alias('external_services', cloudmark_conf.name)
- return cloudmark_conf
- end
-
- rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
- cloudmark_conf['servers'])
- return nil
- end
-
- -- Converts a key-value map to the table representing multipart body, with the following values:
- -- `data`: data of the part
- -- `filename`: optional filename
- -- `content-type`: content type of the element (optional)
- -- `content-transfer-encoding`: optional CTE header
- local function table_to_multipart_body(tbl, boundary)
- local seen_data = false
- local out = {}
-
- for k, v in pairs(tbl) do
- if v.data then
- seen_data = true
- table.insert(out, string.format('--%s\r\n', boundary))
- if v.filename then
- table.insert(out,
- string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
- k, v.filename))
- else
- table.insert(out,
- string.format('Content-Disposition: form-data; name="%s"\r\n', k))
- end
- if v['content-type'] then
- table.insert(out,
- string.format('Content-Type: %s\r\n', v['content-type']))
- else
- table.insert(out, 'Content-Type: text/plain\r\n')
- end
- if v['content-transfer-encoding'] then
- table.insert(out,
- string.format('Content-Transfer-Encoding: %s\r\n',
- v['content-transfer-encoding']))
- else
- table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
- end
- table.insert(out, '\r\n')
- table.insert(out, v.data)
- table.insert(out, '\r\n')
- end
- end
-
- if seen_data then
- table.insert(out, string.format('--%s--\r\n', boundary))
- end
-
- return out
- end
-
- local function get_specific_symbol(scores_symbols, score)
- local selected
- local sel_thr = -1
-
- for threshold, sym in pairs(scores_symbols) do
- if sel_thr < threshold and threshold <= score then
- selected = sym
- sel_thr = threshold
- end
- end
-
- return selected
- end
-
- assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
- assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 80) == nil)
- assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 100) == 'CLOUDMARK_SPAM')
- assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 80) == 'CLOUDMARK_SPAM2')
- assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 70) == nil)
- assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 90) == 'CLOUDMARK_SPAM')
- assert(get_specific_symbol({ }, 80) == nil)
- assert(get_specific_symbol({ [100] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
- assert(get_specific_symbol({ [0] = 'CLOUDMARK_SPAM' }, 0) == 'CLOUDMARK_SPAM')
-
- local function parse_cloudmark_reply(task, rule, body)
- local parser = ucl.parser()
- local ret, err = parser:parse_string(body)
- if not ret then
- rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
- task:insert_result(rule.symbol_fail, 1.0, 'Parser error: ' .. err)
- return
- end
- local obj = parser:get_object()
- lua_util.debugm(N, task, 'cloudmark reply is: %s', obj)
-
- if not obj.score then
- rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
- task:insert_result(rule.symbol_fail, 1.0, 'Parser error: no score')
- return
- end
-
- if obj.analysis then
- -- Report analysis string
- rspamd_logger.infox(task, 'cloudmark report string: %s', obj.analysis)
- end
-
- local score = tonumber(obj.score) or 0
- if score >= rule.score_threshold then
- task:insert_result(rule.symbol_spam, 1.0, tostring(score))
- end
-
- if rule.add_headers and type(obj.appendHeaders) == 'table' then
- local headers_add = fun.tomap(fun.map(function(h)
- return h.headerField, {
- order = 1, value = h.body
- }
- end, obj.appendHeaders))
- lua_mime.modify_headers(task, {
- add = headers_add
- })
- end
-
- if rule.add_score_header then
- lua_mime.modify_headers(task, {
- add = {
- ['X-CMAE-Score'] = {
- order = 1,
- value = tostring(score)
- }
- }
- })
- end
-
- if type(rule.scores_symbols) == 'table' then
- local sym = get_specific_symbol(rule.scores_symbols, score)
- if sym then
- task:insert_result(sym, 1.0, tostring(score))
- end
- end
-
- end
-
- local function cloudmark_check(task, content, digest, rule, maybe_part)
- local function cloudmark_check_uncached()
- local upstream = rule.upstreams:get_upstream_round_robin()
- local addr = upstream:get_addr()
- local retransmits = rule.retransmits
-
- local url = cloudmark_url(rule, addr)
- local message_data = task:get_content()
- if rule.max_message and rule.max_message > 0 and #message_data > rule.max_message then
- task:insert_result(rule['symbol_fail'], 0.0, 'Message too large: ' .. #message_data)
- return
- end
- local request = {
- rfc822 = {
- ['Content-Type'] = 'message/rfc822',
- data = message_data,
- }
- }
-
- local helo = task:get_helo()
- if helo then
- request['heloDomain'] = {
- data = helo,
- }
- end
- local mail_from = task:get_from('smtp') or {}
- if mail_from[1] and #mail_from[1].addr > 1 then
- request['mailFrom'] = {
- data = mail_from[1].addr
- }
- end
-
- local rcpt_to = task:get_recipients('smtp')
- if rcpt_to then
- request['rcptTo'] = {
- data = table.concat(fun.totable(fun.map(function(r)
- return r.addr
- end, rcpt_to)), ',')
- }
- end
-
- local fip = task:get_from_ip()
- if fip and fip:is_valid() then
- request['connIp'] = tostring(fip)
- end
-
- local hostname = task:get_hostname()
- if hostname then
- request['fromHost'] = hostname
- end
-
- local request_data = {
- task = task,
- url = url,
- body = table_to_multipart_body(request, static_boundary),
- headers = {
- ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
- },
- timeout = rule.timeout,
- }
-
- local function cloudmark_callback(http_err, code, body, headers)
-
- local function cloudmark_requery()
- -- set current upstream to fail because an error occurred
- upstream:fail()
-
- -- retry with another upstream until retransmits exceeds
- if retransmits > 0 then
-
- retransmits = retransmits - 1
-
- lua_util.debugm(rule.name, task,
- '%s: request Error: %s - retries left: %s',
- rule.log_prefix, http_err, retransmits)
-
- -- Select a different upstream!
- upstream = rule.upstreams:get_upstream_round_robin()
- addr = upstream:get_addr()
- url = cloudmark_url(rule, addr)
-
- lua_util.debugm(rule.name, task, '%s: retry IP: %s:%s',
- rule.log_prefix, addr, addr:get_port())
- request_data.url = url
-
- http.request(request_data)
- else
- rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits ' ..
- 'exceed', rule.log_prefix)
- task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and ' ..
- 'retransmits exceed')
- upstream:fail()
- end
- end
-
- if http_err then
- cloudmark_requery()
- else
- -- Parse the response
- if upstream then
- upstream:ok()
- end
- if code ~= 200 then
- rspamd_logger.errx(task, 'invalid HTTP code: %s, body: %s, headers: %s', code, body, headers)
- task:insert_result(rule.symbol_fail, 1.0, 'Bad HTTP code: ' .. code)
- return
- end
- parse_cloudmark_reply(task, rule, body)
- end
- end
-
- request_data.callback = cloudmark_callback
- http.request(request_data)
- end
-
- if common.condition_check_and_continue(task, content, rule, digest,
- cloudmark_check_uncached, maybe_part) then
- return
- else
- cloudmark_check_uncached()
- end
- end
-
- return {
- type = { 'cloudmark', 'scanner' },
- description = 'Cloudmark cartridge interface',
- configure = cloudmark_config,
- check = cloudmark_check,
- name = N,
- }
|