You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudmark.lua 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. --[[
  2. Copyright (c) 2021, Alexander Moisseev <moiseev@mezonplus.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module cloudmark
  15. -- This module contains Cloudmark v2 interface
  16. --]]
  17. local lua_util = require "lua_util"
  18. local http = require "rspamd_http"
  19. local upstream_list = require "rspamd_upstream_list"
  20. local rspamd_logger = require "rspamd_logger"
  21. local ucl = require "ucl"
  22. local rspamd_util = require "rspamd_util"
  23. local common = require "lua_scanners/common"
  24. local fun = require "fun"
  25. local lua_mime = require "lua_mime"
  26. local N = 'cloudmark'
  27. -- Boundary for multipart transfers, generated on module init
  28. local static_boundary = rspamd_util.random_hex(32)
  29. local function cloudmark_url(rule, addr, maybe_url)
  30. local url
  31. local port = addr:get_port()
  32. maybe_url = maybe_url or rule.url
  33. if port == 0 then
  34. port = rule.default_port
  35. end
  36. if rule.use_https then
  37. url = string.format('https://%s:%d%s', tostring(addr),
  38. port, maybe_url)
  39. else
  40. url = string.format('http://%s:%d%s', tostring(addr),
  41. port, maybe_url)
  42. end
  43. return url
  44. end
  45. -- Detect cloudmark max size
  46. local function cloudmark_preload(rule, cfg, ev_base, _)
  47. local upstream = rule.upstreams:get_upstream_round_robin()
  48. local addr = upstream:get_addr()
  49. local function max_message_size_cb(http_err, code, body, _)
  50. if http_err then
  51. rspamd_logger.errx(ev_base, 'HTTP error when getting max message size: %s',
  52. http_err)
  53. return
  54. end
  55. if code ~= 200 then
  56. rspamd_logger.errx(ev_base, 'bad HTTP code when getting max message size: %s', code)
  57. end
  58. local parser = ucl.parser()
  59. local ret, err = parser:parse_string(body)
  60. if not ret then
  61. rspamd_logger.errx(ev_base, 'could not parse response body [%s]: %s', body, err)
  62. return
  63. end
  64. local obj = parser:get_object()
  65. local ms = obj.maxMessageSize
  66. if not ms then
  67. rspamd_logger.errx(ev_base, 'missing maxMessageSize in the response body (JSON): %s', obj)
  68. return
  69. end
  70. rule.max_size = ms
  71. lua_util.debugm(N, cfg, 'set maximum message size set to %s bytes', ms)
  72. end
  73. http.request({
  74. ev_base = ev_base,
  75. config = cfg,
  76. url = cloudmark_url(rule, addr, '/score/v2/max-message-size'),
  77. callback = max_message_size_cb,
  78. })
  79. end
  80. local function cloudmark_config(opts)
  81. local cloudmark_conf = {
  82. name = N,
  83. default_port = 2713,
  84. url = '/score/v2/message',
  85. use_https = false,
  86. timeout = 5.0,
  87. log_clean = false,
  88. retransmits = 1,
  89. score_threshold = 90, -- minimum score to considerate reply
  90. message = '${SCANNER}: spam message found: "${VIRUS}"',
  91. max_message = 0,
  92. detection_category = "hash",
  93. default_score = 1,
  94. action = false,
  95. log_spamcause = true,
  96. symbol_fail = 'CLOUDMARK_FAIL',
  97. symbol = 'CLOUDMARK_CHECK',
  98. symbol_spam = 'CLOUDMARK_SPAM',
  99. add_score_header = false, -- Add X-CMAE-Score header
  100. add_headers = false, -- allow addition of the headers from Cloudmark
  101. scores_symbols = nil, -- a table with match { [score_threshold] = symbol, ... }
  102. }
  103. cloudmark_conf = lua_util.override_defaults(cloudmark_conf, opts)
  104. if not cloudmark_conf.prefix then
  105. cloudmark_conf.prefix = 'rs_' .. cloudmark_conf.name .. '_'
  106. end
  107. if not cloudmark_conf.log_prefix then
  108. if cloudmark_conf.name:lower() == cloudmark_conf.type:lower() then
  109. cloudmark_conf.log_prefix = cloudmark_conf.name
  110. else
  111. cloudmark_conf.log_prefix = cloudmark_conf.name .. ' (' .. cloudmark_conf.type .. ')'
  112. end
  113. end
  114. if not cloudmark_conf.servers and cloudmark_conf.socket then
  115. cloudmark_conf.servers = cloudmark_conf.socket
  116. end
  117. if not cloudmark_conf.servers then
  118. rspamd_logger.errx(rspamd_config, 'no servers defined')
  119. return nil
  120. end
  121. cloudmark_conf.upstreams = upstream_list.create(rspamd_config,
  122. cloudmark_conf.servers,
  123. cloudmark_conf.default_port)
  124. if cloudmark_conf.upstreams then
  125. cloudmark_conf.symbols = { { symbol = cloudmark_conf.symbol_spam, score = 5.0 } }
  126. cloudmark_conf.preloads = { cloudmark_preload }
  127. lua_util.add_debug_alias('external_services', cloudmark_conf.name)
  128. return cloudmark_conf
  129. end
  130. rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
  131. cloudmark_conf['servers'])
  132. return nil
  133. end
  134. -- Converts a key-value map to the table representing multipart body, with the following values:
  135. -- `data`: data of the part
  136. -- `filename`: optional filename
  137. -- `content-type`: content type of the element (optional)
  138. -- `content-transfer-encoding`: optional CTE header
  139. local function table_to_multipart_body(tbl, boundary)
  140. local seen_data = false
  141. local out = {}
  142. for k, v in pairs(tbl) do
  143. if v.data then
  144. seen_data = true
  145. table.insert(out, string.format('--%s\r\n', boundary))
  146. if v.filename then
  147. table.insert(out,
  148. string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
  149. k, v.filename))
  150. else
  151. table.insert(out,
  152. string.format('Content-Disposition: form-data; name="%s"\r\n', k))
  153. end
  154. if v['content-type'] then
  155. table.insert(out,
  156. string.format('Content-Type: %s\r\n', v['content-type']))
  157. else
  158. table.insert(out, 'Content-Type: text/plain\r\n')
  159. end
  160. if v['content-transfer-encoding'] then
  161. table.insert(out,
  162. string.format('Content-Transfer-Encoding: %s\r\n',
  163. v['content-transfer-encoding']))
  164. else
  165. table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
  166. end
  167. table.insert(out, '\r\n')
  168. table.insert(out, v.data)
  169. table.insert(out, '\r\n')
  170. end
  171. end
  172. if seen_data then
  173. table.insert(out, string.format('--%s--\r\n', boundary))
  174. end
  175. return out
  176. end
  177. local function get_specific_symbol(scores_symbols, score)
  178. local selected
  179. local sel_thr = -1
  180. for threshold, sym in pairs(scores_symbols) do
  181. if sel_thr < threshold and threshold <= score then
  182. selected = sym
  183. sel_thr = threshold
  184. end
  185. end
  186. return selected
  187. end
  188. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
  189. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 80) == nil)
  190. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 100) == 'CLOUDMARK_SPAM')
  191. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 80) == 'CLOUDMARK_SPAM2')
  192. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 70) == nil)
  193. assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 90) == 'CLOUDMARK_SPAM')
  194. assert(get_specific_symbol({ }, 80) == nil)
  195. assert(get_specific_symbol({ [100] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
  196. assert(get_specific_symbol({ [0] = 'CLOUDMARK_SPAM' }, 0) == 'CLOUDMARK_SPAM')
  197. local function parse_cloudmark_reply(task, rule, body)
  198. local parser = ucl.parser()
  199. local ret, err = parser:parse_string(body)
  200. if not ret then
  201. rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
  202. task:insert_result(rule.symbol_fail, 1.0, 'Parser error: ' .. err)
  203. return
  204. end
  205. local obj = parser:get_object()
  206. lua_util.debugm(N, task, 'cloudmark reply is: %s', obj)
  207. if not obj.score then
  208. rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
  209. task:insert_result(rule.symbol_fail, 1.0, 'Parser error: no score')
  210. return
  211. end
  212. if obj.analysis then
  213. -- Report analysis string
  214. rspamd_logger.infox(task, 'cloudmark report string: %s', obj.analysis)
  215. end
  216. local score = tonumber(obj.score) or 0
  217. if score >= rule.score_threshold then
  218. task:insert_result(rule.symbol_spam, 1.0, tostring(score))
  219. end
  220. if rule.add_headers and type(obj.appendHeaders) == 'table' then
  221. local headers_add = fun.tomap(fun.map(function(h)
  222. return h.headerField, {
  223. order = 1, value = h.body
  224. }
  225. end, obj.appendHeaders))
  226. lua_mime.modify_headers(task, {
  227. add = headers_add
  228. })
  229. end
  230. if rule.add_score_header then
  231. lua_mime.modify_headers(task, {
  232. add = {
  233. ['X-CMAE-Score'] = {
  234. order = 1,
  235. value = tostring(score)
  236. }
  237. }
  238. })
  239. end
  240. if type(rule.scores_symbols) == 'table' then
  241. local sym = get_specific_symbol(rule.scores_symbols, score)
  242. if sym then
  243. task:insert_result(sym, 1.0, tostring(score))
  244. end
  245. end
  246. end
  247. local function cloudmark_check(task, content, digest, rule, maybe_part)
  248. local function cloudmark_check_uncached()
  249. local upstream = rule.upstreams:get_upstream_round_robin()
  250. local addr = upstream:get_addr()
  251. local retransmits = rule.retransmits
  252. local url = cloudmark_url(rule, addr)
  253. local message_data = task:get_content()
  254. if rule.max_message and rule.max_message > 0 and #message_data > rule.max_message then
  255. task:insert_result(rule['symbol_fail'], 0.0, 'Message too large: ' .. #message_data)
  256. return
  257. end
  258. local request = {
  259. rfc822 = {
  260. ['Content-Type'] = 'message/rfc822',
  261. data = message_data,
  262. }
  263. }
  264. local helo = task:get_helo()
  265. if helo then
  266. request['heloDomain'] = {
  267. data = helo,
  268. }
  269. end
  270. local mail_from = task:get_from('smtp') or {}
  271. if mail_from[1] and #mail_from[1].addr > 1 then
  272. request['mailFrom'] = {
  273. data = mail_from[1].addr
  274. }
  275. end
  276. local rcpt_to = task:get_recipients('smtp')
  277. if rcpt_to then
  278. request['rcptTo'] = {
  279. data = table.concat(fun.totable(fun.map(function(r)
  280. return r.addr
  281. end, rcpt_to)), ',')
  282. }
  283. end
  284. local fip = task:get_from_ip()
  285. if fip and fip:is_valid() then
  286. request['connIp'] = tostring(fip)
  287. end
  288. local hostname = task:get_hostname()
  289. if hostname then
  290. request['fromHost'] = hostname
  291. end
  292. local request_data = {
  293. task = task,
  294. url = url,
  295. body = table_to_multipart_body(request, static_boundary),
  296. headers = {
  297. ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
  298. },
  299. timeout = rule.timeout,
  300. }
  301. local function cloudmark_callback(http_err, code, body, headers)
  302. local function cloudmark_requery()
  303. -- set current upstream to fail because an error occurred
  304. upstream:fail()
  305. -- retry with another upstream until retransmits exceeds
  306. if retransmits > 0 then
  307. retransmits = retransmits - 1
  308. lua_util.debugm(rule.name, task,
  309. '%s: request Error: %s - retries left: %s',
  310. rule.log_prefix, http_err, retransmits)
  311. -- Select a different upstream!
  312. upstream = rule.upstreams:get_upstream_round_robin()
  313. addr = upstream:get_addr()
  314. url = cloudmark_url(rule, addr)
  315. lua_util.debugm(rule.name, task, '%s: retry IP: %s:%s',
  316. rule.log_prefix, addr, addr:get_port())
  317. request_data.url = url
  318. http.request(request_data)
  319. else
  320. rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits ' ..
  321. 'exceed', rule.log_prefix)
  322. task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and ' ..
  323. 'retransmits exceed')
  324. upstream:fail()
  325. end
  326. end
  327. if http_err then
  328. cloudmark_requery()
  329. else
  330. -- Parse the response
  331. if upstream then
  332. upstream:ok()
  333. end
  334. if code ~= 200 then
  335. rspamd_logger.errx(task, 'invalid HTTP code: %s, body: %s, headers: %s', code, body, headers)
  336. task:insert_result(rule.symbol_fail, 1.0, 'Bad HTTP code: ' .. code)
  337. return
  338. end
  339. parse_cloudmark_reply(task, rule, body)
  340. end
  341. end
  342. request_data.callback = cloudmark_callback
  343. http.request(request_data)
  344. end
  345. if common.condition_check_and_continue(task, content, rule, digest,
  346. cloudmark_check_uncached, maybe_part) then
  347. return
  348. else
  349. cloudmark_check_uncached()
  350. end
  351. end
  352. return {
  353. type = { 'cloudmark', 'scanner' },
  354. description = 'Cloudmark cartridge interface',
  355. configure = cloudmark_config,
  356. check = cloudmark_check,
  357. name = N,
  358. }