You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudmark.lua 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. --[[
  2. Copyright (c) 2021, Alexander Moisseev <moiseev@mezonplus.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module cloudmark
  15. -- This module contains Cloudmark v2 interface
  16. --]]
  17. local lua_util = require "lua_util"
  18. local http = require "rspamd_http"
  19. local upstream_list = require "rspamd_upstream_list"
  20. local rspamd_logger = require "rspamd_logger"
  21. local ucl = require "ucl"
  22. local rspamd_util = require "rspamd_util"
  23. local common = require "lua_scanners/common"
  24. local fun = require "fun"
  25. local lua_mime = require "lua_mime"
  26. local N = 'cloudmark'
  27. -- Boundary for multipart transfers, generated on module init
  28. local static_boundary = rspamd_util.random_hex(32)
  29. local function cloudmark_url(rule, addr, maybe_url)
  30. local url
  31. local port = addr:get_port()
  32. maybe_url = maybe_url or rule.url
  33. if port == 0 then
  34. port = rule.default_port
  35. end
  36. if rule.use_https then
  37. url = string.format('https://%s:%d%s', tostring(addr),
  38. port, maybe_url)
  39. else
  40. url = string.format('http://%s:%d%s', tostring(addr),
  41. port, maybe_url)
  42. end
  43. return url
  44. end
  45. -- Detect cloudmark max size
  46. local function cloudmark_preload(rule, cfg, ev_base, _)
  47. local upstream = rule.upstreams:get_upstream_round_robin()
  48. local addr = upstream:get_addr()
  49. local function max_message_size_cb(http_err, code, body, _)
  50. if http_err then
  51. rspamd_logger.errx(ev_base, 'HTTP error when getting max message size: %s',
  52. http_err)
  53. return
  54. end
  55. if code ~= 200 then
  56. rspamd_logger.errx(ev_base, 'bad HTTP code when getting max message size: %s', code)
  57. end
  58. local parser = ucl.parser()
  59. local ret, err = parser:parse_string(body)
  60. if not ret then
  61. rspamd_logger.errx(ev_base, 'could not parse response body [%s]: %s', body, err)
  62. return
  63. end
  64. local obj = parser:get_object()
  65. local ms = obj.maxMessageSize
  66. if not ms then
  67. rspamd_logger.errx(ev_base, 'missing maxMessageSize in the response body (JSON): %s', obj)
  68. return
  69. end
  70. rule.max_size = ms
  71. lua_util.debugm(N, cfg, 'set maximum message size set to %s bytes', ms)
  72. end
  73. http.request({
  74. ev_base = ev_base,
  75. config = cfg,
  76. url = cloudmark_url(rule, addr, '/score/v2/max-message-size'),
  77. callback = max_message_size_cb,
  78. })
  79. end
  80. local function numerify(d)
  81. local l = {}
  82. for k in pairs(d) do
  83. table.insert(l, k)
  84. end
  85. for _, k in ipairs(l) do
  86. local new_key = tonumber(k)
  87. if new_key then
  88. d[new_key] = d[k]
  89. d[k] = nil
  90. end
  91. end
  92. end
  93. local function cloudmark_config(opts)
  94. local cloudmark_conf = {
  95. name = N,
  96. default_port = 2713,
  97. url = '/score/v2/message',
  98. use_https = false,
  99. timeout = 5.0,
  100. log_clean = false,
  101. retransmits = 1,
  102. score_threshold = 90, -- minimum score to considerate reply
  103. message = '${SCANNER}: spam message found: "${VIRUS}"',
  104. max_message = 0,
  105. detection_category = "hash",
  106. default_score = 1,
  107. action = false,
  108. log_spamcause = true,
  109. symbol_fail = 'CLOUDMARK_FAIL',
  110. symbol = 'CLOUDMARK_CHECK',
  111. symbol_spam = 'CLOUDMARK_SPAM',
  112. add_score_header = false, -- Add X-CMAE-Score header
  113. add_headers = false, -- allow addition of the headers from Cloudmark
  114. scores_symbols = nil, -- a table with match { [score_threshold] = symbol, ... }
  115. }
  116. cloudmark_conf = lua_util.override_defaults(cloudmark_conf, opts)
  117. if type(cloudmark_conf.scores_symbols) == 'table' then
  118. numerify(cloudmark_conf.scores_symbols)
  119. end
  120. if not cloudmark_conf.prefix then
  121. cloudmark_conf.prefix = 'rs_' .. cloudmark_conf.name .. '_'
  122. end
  123. if not cloudmark_conf.log_prefix then
  124. if cloudmark_conf.name:lower() == cloudmark_conf.type:lower() then
  125. cloudmark_conf.log_prefix = cloudmark_conf.name
  126. else
  127. cloudmark_conf.log_prefix = cloudmark_conf.name .. ' (' .. cloudmark_conf.type .. ')'
  128. end
  129. end
  130. if not cloudmark_conf.servers and cloudmark_conf.socket then
  131. cloudmark_conf.servers = cloudmark_conf.socket
  132. end
  133. if not cloudmark_conf.servers then
  134. rspamd_logger.errx(rspamd_config, 'no servers defined')
  135. return nil
  136. end
  137. cloudmark_conf.upstreams = upstream_list.create(rspamd_config,
  138. cloudmark_conf.servers,
  139. cloudmark_conf.default_port)
  140. if cloudmark_conf.upstreams then
  141. cloudmark_conf.symbols = { { symbol = cloudmark_conf.symbol_spam, score = 5.0 } }
  142. cloudmark_conf.preloads = { cloudmark_preload }
  143. lua_util.add_debug_alias('external_services', cloudmark_conf.name)
  144. return cloudmark_conf
  145. end
  146. rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
  147. cloudmark_conf['servers'])
  148. return nil
  149. end
  150. -- Converts a key-value map to the table representing multipart body, with the following values:
  151. -- `data`: data of the part
  152. -- `filename`: optional filename
  153. -- `content-type`: content type of the element (optional)
  154. -- `content-transfer-encoding`: optional CTE header
  155. local function table_to_multipart_body(tbl, boundary)
  156. local seen_data = false
  157. local out = {}
  158. for k, v in pairs(tbl) do
  159. if v.data then
  160. seen_data = true
  161. table.insert(out, string.format('--%s\r\n', boundary))
  162. if v.filename then
  163. table.insert(out,
  164. string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
  165. k, v.filename))
  166. else
  167. table.insert(out,
  168. string.format('Content-Disposition: form-data; name="%s"\r\n', k))
  169. end
  170. if v['content-type'] then
  171. table.insert(out,
  172. string.format('Content-Type: %s\r\n', v['content-type']))
  173. else
  174. table.insert(out, 'Content-Type: text/plain\r\n')
  175. end
  176. if v['content-transfer-encoding'] then
  177. table.insert(out,
  178. string.format('Content-Transfer-Encoding: %s\r\n',
  179. v['content-transfer-encoding']))
  180. else
  181. table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
  182. end
  183. table.insert(out, '\r\n')
  184. table.insert(out, v.data)
  185. table.insert(out, '\r\n')
  186. end
  187. end
  188. if seen_data then
  189. table.insert(out, string.format('--%s--\r\n', boundary))
  190. end
  191. return out
  192. end
  193. local function get_specific_symbol(scores_symbols, score)
  194. local selected
  195. local sel_thr = -1
  196. for threshold, sym in pairs(scores_symbols) do
  197. if sel_thr < threshold and threshold <= score then
  198. selected = sym
  199. sel_thr = threshold
  200. end
  201. end
  202. return selected
  203. end
  204. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
  205. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 80) == nil)
  206. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 100) == 'CLOUDMARK_SPAM')
  207. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 80) == 'CLOUDMARK_SPAM2')
  208. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 70) == nil)
  209. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 90) == 'CLOUDMARK_SPAM')
  210. assert(get_specific_symbol({ }, 80) == nil)
  211. assert(get_specific_symbol({ [100] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
  212. assert(get_specific_symbol({ [0] = 'CLOUDMARK_SPAM' }, 0) == 'CLOUDMARK_SPAM')
  213. local function parse_cloudmark_reply(task, rule, body)
  214. local parser = ucl.parser()
  215. local ret, err = parser:parse_string(body)
  216. if not ret then
  217. rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
  218. task:insert_result(rule.symbol_fail, 1.0, 'Parser error: ' .. err)
  219. return
  220. end
  221. local obj = parser:get_object()
  222. lua_util.debugm(N, task, 'cloudmark reply is: %s', obj)
  223. if not obj.score then
  224. rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
  225. task:insert_result(rule.symbol_fail, 1.0, 'Parser error: no score')
  226. return
  227. end
  228. if obj.analysis then
  229. -- Report analysis string
  230. rspamd_logger.infox(task, 'cloudmark report string: %s', obj.analysis)
  231. end
  232. local score = tonumber(obj.score) or 0
  233. if score >= rule.score_threshold then
  234. task:insert_result(rule.symbol_spam, 1.0, tostring(score))
  235. end
  236. if rule.add_headers and type(obj.appendHeaders) == 'table' then
  237. local headers_add = fun.tomap(fun.map(function(h)
  238. return h.headerField, {
  239. order = 1, value = h.body
  240. }
  241. end, obj.appendHeaders))
  242. lua_mime.modify_headers(task, {
  243. add = headers_add
  244. })
  245. end
  246. if rule.add_score_header then
  247. lua_mime.modify_headers(task, {
  248. add = {
  249. ['X-CMAE-Score'] = {
  250. order = 1,
  251. value = tostring(score)
  252. }
  253. }
  254. })
  255. end
  256. if type(rule.scores_symbols) == 'table' then
  257. local sym = get_specific_symbol(rule.scores_symbols, score)
  258. if sym then
  259. task:insert_result(sym, 1.0, tostring(score))
  260. end
  261. end
  262. end
  263. local function cloudmark_check(task, content, digest, rule, maybe_part)
  264. local function cloudmark_check_uncached()
  265. local upstream = rule.upstreams:get_upstream_round_robin()
  266. local addr = upstream:get_addr()
  267. local retransmits = rule.retransmits
  268. local url = cloudmark_url(rule, addr)
  269. local message_data = task:get_content()
  270. if rule.max_message and rule.max_message > 0 and #message_data > rule.max_message then
  271. task:insert_result(rule['symbol_fail'], 0.0, 'Message too large: ' .. #message_data)
  272. return
  273. end
  274. local request = {
  275. rfc822 = {
  276. ['Content-Type'] = 'message/rfc822',
  277. data = message_data,
  278. }
  279. }
  280. local helo = task:get_helo()
  281. if helo then
  282. request['heloDomain'] = {
  283. data = helo,
  284. }
  285. end
  286. local mail_from = task:get_from('smtp') or {}
  287. if mail_from[1] and #mail_from[1].addr > 1 then
  288. request['mailFrom'] = {
  289. data = mail_from[1].addr
  290. }
  291. end
  292. local rcpt_to = task:get_recipients('smtp')
  293. if rcpt_to then
  294. request['rcptTo'] = {
  295. data = table.concat(fun.totable(fun.map(function(r)
  296. return r.addr
  297. end, rcpt_to)), ',')
  298. }
  299. end
  300. local fip = task:get_from_ip()
  301. if fip and fip:is_valid() then
  302. request['connIp'] = tostring(fip)
  303. end
  304. local hostname = task:get_hostname()
  305. if hostname then
  306. request['fromHost'] = hostname
  307. end
  308. local request_data = {
  309. task = task,
  310. url = url,
  311. body = table_to_multipart_body(request, static_boundary),
  312. headers = {
  313. ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
  314. },
  315. timeout = rule.timeout,
  316. }
  317. local function cloudmark_callback(http_err, code, body, headers)
  318. local function cloudmark_requery()
  319. -- set current upstream to fail because an error occurred
  320. upstream:fail()
  321. -- retry with another upstream until retransmits exceeds
  322. if retransmits > 0 then
  323. retransmits = retransmits - 1
  324. lua_util.debugm(rule.name, task,
  325. '%s: request Error: %s - retries left: %s',
  326. rule.log_prefix, http_err, retransmits)
  327. -- Select a different upstream!
  328. upstream = rule.upstreams:get_upstream_round_robin()
  329. addr = upstream:get_addr()
  330. url = cloudmark_url(rule, addr)
  331. lua_util.debugm(rule.name, task, '%s: retry IP: %s:%s',
  332. rule.log_prefix, addr, addr:get_port())
  333. request_data.url = url
  334. http.request(request_data)
  335. else
  336. rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits ' ..
  337. 'exceed', rule.log_prefix)
  338. task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and ' ..
  339. 'retransmits exceed')
  340. upstream:fail()
  341. end
  342. end
  343. if http_err then
  344. cloudmark_requery()
  345. else
  346. -- Parse the response
  347. if upstream then
  348. upstream:ok()
  349. end
  350. if code ~= 200 then
  351. rspamd_logger.errx(task, 'invalid HTTP code: %s, body: %s, headers: %s', code, body, headers)
  352. task:insert_result(rule.symbol_fail, 1.0, 'Bad HTTP code: ' .. code)
  353. return
  354. end
  355. parse_cloudmark_reply(task, rule, body)
  356. end
  357. end
  358. request_data.callback = cloudmark_callback
  359. http.request(request_data)
  360. end
  361. if common.condition_check_and_continue(task, content, rule, digest,
  362. cloudmark_check_uncached, maybe_part) then
  363. return
  364. else
  365. cloudmark_check_uncached()
  366. end
  367. end
  368. return {
  369. type = { 'cloudmark', 'scanner' },
  370. description = 'Cloudmark cartridge interface',
  371. configure = cloudmark_config,
  372. check = cloudmark_check,
  373. name = N,
  374. }