aboutsummaryrefslogtreecommitdiffstats
path: root/lualib/lua_scanners/cloudmark.lua
blob: ccb45b0471649a5e1c7299efd15006fa67f09e6b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
--[[
Copyright (c) 2021, Alexander Moisseev <moiseev@mezonplus.ru>

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--

--[[[
-- @module cloudmark
-- This module contains Cloudmark v2 interface
--]]

local lua_util = require "lua_util"
local http = require "rspamd_http"
local upstream_list = require "rspamd_upstream_list"
local rspamd_logger = require "rspamd_logger"
local ucl = require "ucl"
local rspamd_util = require "rspamd_util"
local common = require "lua_scanners/common"
local fun = require "fun"
local lua_mime = require "lua_mime"

local N = 'cloudmark'
-- Boundary for multipart transfers, generated on module init
local static_boundary = rspamd_util.random_hex(32)

local function cloudmark_url(rule, addr, maybe_url)
  local url
  local port = addr:get_port()

  maybe_url = maybe_url or rule.url
  if port == 0 then
    port = rule.default_port
  end
  if rule.use_https then
    url = string.format('https://%s:%d%s', tostring(addr),
        port, maybe_url)
  else
    url = string.format('http://%s:%d%s', tostring(addr),
        port, maybe_url)
  end

  return url
end

-- Detect cloudmark max size
local function cloudmark_preload(rule, cfg, ev_base, _)
  local upstream = rule.upstreams:get_upstream_round_robin()
  local addr = upstream:get_addr()
  local function max_message_size_cb(http_err, code, body, _)
    if http_err then
      rspamd_logger.errx(ev_base, 'HTTP error when getting max message size: %s',
          http_err)
      return
    end
    if code ~= 200 then
      rspamd_logger.errx(ev_base, 'bad HTTP code when getting max message size: %s', code)
    end
    local parser = ucl.parser()
    local ret, err = parser:parse_string(body)
    if not ret then
      rspamd_logger.errx(ev_base, 'could not parse response body [%s]: %s', body, err)
      return
    end
    local obj = parser:get_object()
    local ms = obj.maxMessageSize
    if not ms then
      rspamd_logger.errx(ev_base, 'missing maxMessageSize in the response body (JSON): %s', obj)
      return
    end

    rule.max_size = ms
    lua_util.debugm(N, cfg, 'set maximum message size set to %s bytes', ms)
  end
  http.request({
    ev_base = ev_base,
    config = cfg,
    url = cloudmark_url(rule, addr, '/score/v2/max-message-size'),
    callback = max_message_size_cb,
  })
end

local function numerify(d)
  local l = {}
  for k in pairs(d) do
    table.insert(l, k)
  end
  for _, k in ipairs(l) do
    local new_key = tonumber(k)
    if new_key then
      d[new_key] = d[k]
      d[k] = nil
    end
  end
end

local function cloudmark_config(opts)

  local cloudmark_conf = {
    name = N,
    default_port = 2713,
    url = '/score/v2/message',
    use_https = false,
    timeout = 5.0,
    log_clean = false,
    retransmits = 1,
    score_threshold = 90, -- minimum score to considerate reply
    message = '${SCANNER}: spam message found: "${VIRUS}"',
    max_message = 0,
    detection_category = "hash",
    default_score = 1,
    action = false,
    log_spamcause = true,
    symbol_fail = 'CLOUDMARK_FAIL',
    symbol = 'CLOUDMARK_CHECK',
    symbol_spam = 'CLOUDMARK_SPAM',
    add_score_header = false, -- Add X-CMAE-Score header
    add_headers = false, -- allow addition of the headers from Cloudmark
    scores_symbols = nil, -- a table with match { [score_threshold] = symbol, ... }
  }

  cloudmark_conf = lua_util.override_defaults(cloudmark_conf, opts)

  if type(cloudmark_conf.scores_symbols) == 'table' then
    numerify(cloudmark_conf.scores_symbols)
  end

  if not cloudmark_conf.prefix then
    cloudmark_conf.prefix = 'rs_' .. cloudmark_conf.name .. '_'
  end

  if not cloudmark_conf.log_prefix then
    if cloudmark_conf.name:lower() == cloudmark_conf.type:lower() then
      cloudmark_conf.log_prefix = cloudmark_conf.name
    else
      cloudmark_conf.log_prefix = cloudmark_conf.name .. ' (' .. cloudmark_conf.type .. ')'
    end
  end

  if not cloudmark_conf.servers and cloudmark_conf.socket then
    cloudmark_conf.servers = cloudmark_conf.socket
  end

  if not cloudmark_conf.servers then
    rspamd_logger.errx(rspamd_config, 'no servers defined')

    return nil
  end

  cloudmark_conf.upstreams = upstream_list.create(rspamd_config,
      cloudmark_conf.servers,
      cloudmark_conf.default_port)

  if cloudmark_conf.upstreams then

    cloudmark_conf.symbols = { { symbol = cloudmark_conf.symbol_spam, score = 5.0 } }
    cloudmark_conf.preloads = { cloudmark_preload }
    lua_util.add_debug_alias('external_services', cloudmark_conf.name)
    return cloudmark_conf
  end

  rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
      cloudmark_conf['servers'])
  return nil
end

-- Converts a key-value map to the table representing multipart body, with the following values:
-- `data`: data of the part
-- `filename`: optional filename
-- `content-type`: content type of the element (optional)
-- `content-transfer-encoding`: optional CTE header
local function table_to_multipart_body(tbl, boundary)
  local seen_data = false
  local out = {}

  for k, v in pairs(tbl) do
    if v.data then
      seen_data = true
      table.insert(out, string.format('--%s\r\n', boundary))
      if v.filename then
        table.insert(out,
            string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
                k, v.filename))
      else
        table.insert(out,
            string.format('Content-Disposition: form-data; name="%s"\r\n', k))
      end
      if v['content-type'] then
        table.insert(out,
            string.format('Content-Type: %s\r\n', v['content-type']))
      else
        table.insert(out, 'Content-Type: text/plain\r\n')
      end
      if v['content-transfer-encoding'] then
        table.insert(out,
            string.format('Content-Transfer-Encoding: %s\r\n',
                v['content-transfer-encoding']))
      else
        table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
      end
      table.insert(out, '\r\n')
      table.insert(out, v.data)
      table.insert(out, '\r\n')
    end
  end

  if seen_data then
    table.insert(out, string.format('--%s--\r\n', boundary))
  end

  return out
end

local function get_specific_symbol(scores_symbols, score)
  local selected
  local sel_thr = -1

  for threshold, sym in pairs(scores_symbols) do
    if sel_thr < threshold and threshold <= score then
      selected = sym
      sel_thr = threshold
    end
  end

  return selected
end

assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 80) == nil)
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 100) == 'CLOUDMARK_SPAM')
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 80) == 'CLOUDMARK_SPAM2')
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 70) == nil)
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 90) == 'CLOUDMARK_SPAM')
assert(get_specific_symbol({ }, 80) == nil)
assert(get_specific_symbol({ [100] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
assert(get_specific_symbol({ [0] = 'CLOUDMARK_SPAM' }, 0) == 'CLOUDMARK_SPAM')

local function parse_cloudmark_reply(task, rule, body)
  local parser = ucl.parser()
  local ret, err = parser:parse_string(body)
  if not ret then
    rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
    task:insert_result(rule.symbol_fail, 1.0, 'Parser error: ' .. err)
    return
  end
  local obj = parser:get_object()
  lua_util.debugm(N, task, 'cloudmark reply is: %s', obj)

  if not obj.score then
    rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
    task:insert_result(rule.symbol_fail, 1.0, 'Parser error: no score')
    return
  end

  if obj.analysis then
    -- Report analysis string
    rspamd_logger.infox(task, 'cloudmark report string: %s', obj.analysis)
  end

  local score = tonumber(obj.score) or 0
  if score >= rule.score_threshold then
    task:insert_result(rule.symbol_spam, 1.0, tostring(score))
  end

  if rule.add_headers and type(obj.appendHeaders) == 'table' then
    local headers_add = fun.tomap(fun.map(function(h)
      return h.headerField, {
        order = 1, value = h.body
      }
    end, obj.appendHeaders))
    lua_mime.modify_headers(task, {
      add = headers_add
    })
  end

  if rule.add_score_header then
    lua_mime.modify_headers(task, {
      add = {
        ['X-CMAE-Score'] = {
          order = 1,
          value = tostring(score)
        }
      }
    })
  end

  if type(rule.scores_symbols) == 'table' then
    local sym = get_specific_symbol(rule.scores_symbols, score)
    if sym then
      task:insert_result(sym, 1.0, tostring(score))
    end
  end

end

local function cloudmark_check(task, content, digest, rule, maybe_part)
  local function cloudmark_check_uncached()
    local upstream = rule.upstreams:get_upstream_round_robin()
    local addr = upstream:get_addr()
    local retransmits = rule.retransmits

    local url = cloudmark_url(rule, addr)
    local message_data = task:get_content()
    if rule.max_message and rule.max_message > 0 and #message_data > rule.max_message then
      task:insert_result(rule['symbol_fail'], 0.0, 'Message too large: ' .. #message_data)
      return
    end
    local request = {
      rfc822 = {
        ['Content-Type'] = 'message/rfc822',
        data = message_data,
      }
    }

    local helo = task:get_helo()
    if helo then
      request['heloDomain'] = {
        data = helo,
      }
    end
    local mail_from = task:get_from('smtp') or {}
    if mail_from[1] and #mail_from[1].addr > 1 then
      request['mailFrom'] = {
        data = mail_from[1].addr
      }
    end

    local rcpt_to = task:get_recipients('smtp')
    if rcpt_to then
      request['rcptTo'] = {
        data = table.concat(fun.totable(fun.map(function(r)
          return r.addr
        end, rcpt_to)), ',')
      }
    end

    local fip = task:get_from_ip()
    if fip and fip:is_valid() then
      request['connIp'] = {
        data = tostring(fip)
      }
    end

    local hostname = task:get_hostname()
    if hostname then
      request['fromHost'] = hostname
    end

    local request_data = {
      task = task,
      url = url,
      body = table_to_multipart_body(request, static_boundary),
      headers = {
        ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
      },
      timeout = rule.timeout,
    }

    local function cloudmark_callback(http_err, code, body, headers)

      local function cloudmark_requery()
        -- set current upstream to fail because an error occurred
        upstream:fail()

        -- retry with another upstream until retransmits exceeds
        if retransmits > 0 then

          retransmits = retransmits - 1

          lua_util.debugm(rule.name, task,
              '%s: request Error: %s - retries left: %s',
              rule.log_prefix, http_err, retransmits)

          -- Select a different upstream!
          upstream = rule.upstreams:get_upstream_round_robin()
          addr = upstream:get_addr()
          url = cloudmark_url(rule, addr)

          lua_util.debugm(rule.name, task, '%s: retry IP: %s:%s',
              rule.log_prefix, addr, addr:get_port())
          request_data.url = url

          http.request(request_data)
        else
          rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits ' ..
              'exceed', rule.log_prefix)
          task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and ' ..
              'retransmits exceed')
          upstream:fail()
        end
      end

      if http_err then
        cloudmark_requery()
      else
        -- Parse the response
        if upstream then
          upstream:ok()
        end
        if code ~= 200 then
          rspamd_logger.errx(task, 'invalid HTTP code: %s, body: %s, headers: %s', code, body, headers)
          task:insert_result(rule.symbol_fail, 1.0, 'Bad HTTP code: ' .. code)
          return
        end
        parse_cloudmark_reply(task, rule, body)
      end
    end

    request_data.callback = cloudmark_callback
    http.request(request_data)
  end

  if common.condition_check_and_continue(task, content, rule, digest,
      cloudmark_check_uncached, maybe_part) then
    return
  else
    cloudmark_check_uncached()
  end
end

return {
  type = { 'cloudmark', 'scanner' },
  description = 'Cloudmark cartridge interface',
  configure = cloudmark_config,
  check = cloudmark_check,
  name = N,
}