You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudmark.lua 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. --[[
  2. Copyright (c) 2021, Alexander Moisseev <moiseev@mezonplus.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module cloudmark
  15. -- This module contains Cloudmark v2 interface
  16. --]]
  17. local lua_util = require "lua_util"
  18. local http = require "rspamd_http"
  19. local upstream_list = require "rspamd_upstream_list"
  20. local rspamd_logger = require "rspamd_logger"
  21. local ucl = require "ucl"
  22. local rspamd_util = require "rspamd_util"
  23. local common = require "lua_scanners/common"
  24. local fun = require "fun"
  25. local lua_mime = require "lua_mime"
  26. local N = 'cloudmark'
  27. -- Boundary for multipart transfers, generated on module init
  28. local static_boundary = rspamd_util.random_hex(32)
  29. local function cloudmark_url(rule, addr, maybe_url)
  30. local url
  31. local port = addr:get_port()
  32. maybe_url = maybe_url or rule.url
  33. if port == 0 then
  34. port = rule.default_port
  35. end
  36. if rule.use_https then
  37. url = string.format('https://%s:%d%s', tostring(addr),
  38. port, maybe_url)
  39. else
  40. url = string.format('http://%s:%d%s', tostring(addr),
  41. port, maybe_url)
  42. end
  43. return url
  44. end
  45. -- Detect cloudmark max size
  46. local function cloudmark_preload(rule, cfg, ev_base, _)
  47. local upstream = rule.upstreams:get_upstream_round_robin()
  48. local addr = upstream:get_addr()
  49. local function max_message_size_cb(http_err, code, body, _)
  50. if http_err then
  51. rspamd_logger.errx(ev_base, 'HTTP error when getting max message size: %s',
  52. http_err)
  53. return
  54. end
  55. if code ~= 200 then
  56. rspamd_logger.errx(ev_base, 'bad HTTP code when getting max message size: %s', code)
  57. end
  58. local parser = ucl.parser()
  59. local ret, err = parser:parse_string(body)
  60. if not ret then
  61. rspamd_logger.errx(ev_base, 'could not parse response body [%s]: %s', body, err)
  62. return
  63. end
  64. local obj = parser:get_object()
  65. local ms = obj.maxMessageSize
  66. if not ms then
  67. rspamd_logger.errx(ev_base, 'missing maxMessageSize in the response body (JSON): %s', obj)
  68. return
  69. end
  70. rule.max_size = ms
  71. lua_util.debugm(N, cfg, 'set maximum message size set to %s bytes', ms)
  72. end
  73. http.request({
  74. ev_base = ev_base,
  75. config = cfg,
  76. url = cloudmark_url(rule, addr, '/score/v2/max-message-size'),
  77. callback = max_message_size_cb,
  78. })
  79. end
  80. local function cloudmark_config(opts)
  81. local cloudmark_conf = {
  82. name = N,
  83. default_port = 2713,
  84. url = '/score/v2/message',
  85. use_https = false,
  86. timeout = 5.0,
  87. log_clean = false,
  88. retransmits = 1,
  89. score_threshold = 90, -- minimum score to considerate reply
  90. message = '${SCANNER}: spam message found: "${VIRUS}"',
  91. max_message = 0,
  92. detection_category = "hash",
  93. default_score = 1,
  94. action = false,
  95. log_spamcause = true,
  96. symbol_fail = 'CLOUDMARK_FAIL',
  97. symbol = 'CLOUDMARK_CHECK',
  98. symbol_spam = 'CLOUDMARK_SPAM',
  99. add_headers = false, -- allow addition of the headers from Cloudmark
  100. }
  101. cloudmark_conf = lua_util.override_defaults(cloudmark_conf, opts)
  102. if not cloudmark_conf.prefix then
  103. cloudmark_conf.prefix = 'rs_' .. cloudmark_conf.name .. '_'
  104. end
  105. if not cloudmark_conf.log_prefix then
  106. if cloudmark_conf.name:lower() == cloudmark_conf.type:lower() then
  107. cloudmark_conf.log_prefix = cloudmark_conf.name
  108. else
  109. cloudmark_conf.log_prefix = cloudmark_conf.name .. ' (' .. cloudmark_conf.type .. ')'
  110. end
  111. end
  112. if not cloudmark_conf.servers and cloudmark_conf.socket then
  113. cloudmark_conf.servers = cloudmark_conf.socket
  114. end
  115. if not cloudmark_conf.servers then
  116. rspamd_logger.errx(rspamd_config, 'no servers defined')
  117. return nil
  118. end
  119. cloudmark_conf.upstreams = upstream_list.create(rspamd_config,
  120. cloudmark_conf.servers,
  121. cloudmark_conf.default_port)
  122. if cloudmark_conf.upstreams then
  123. cloudmark_conf.symbols = {{ symbol = cloudmark_conf.symbol_spam, score = 5.0 }}
  124. cloudmark_conf.preloads = {cloudmark_preload}
  125. lua_util.add_debug_alias('external_services', cloudmark_conf.name)
  126. return cloudmark_conf
  127. end
  128. rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
  129. cloudmark_conf['servers'])
  130. return nil
  131. end
  132. -- Converts a key-value map to the table representing multipart body, with the following values:
  133. -- `data`: data of the part
  134. -- `filename`: optional filename
  135. -- `content-type`: content type of the element (optional)
  136. -- `content-transfer-encoding`: optional CTE header
  137. local function table_to_multipart_body(tbl, boundary)
  138. local seen_data = false
  139. local out = {}
  140. for k,v in pairs(tbl) do
  141. if v.data then
  142. seen_data = true
  143. table.insert(out, string.format('--%s\r\n', boundary))
  144. if v.filename then
  145. table.insert(out,
  146. string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
  147. k, v.filename))
  148. else
  149. table.insert(out,
  150. string.format('Content-Disposition: form-data; name="%s"\r\n', k))
  151. end
  152. if v['content-type'] then
  153. table.insert(out,
  154. string.format('Content-Type: %s\r\n', v['content-type']))
  155. else
  156. table.insert(out, 'Content-Type: text/plain\r\n')
  157. end
  158. if v['content-transfer-encoding'] then
  159. table.insert(out,
  160. string.format('Content-Transfer-Encoding: %s\r\n',
  161. v['content-transfer-encoding']))
  162. else
  163. table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
  164. end
  165. table.insert(out, '\r\n')
  166. table.insert(out, v.data)
  167. table.insert(out, '\r\n')
  168. end
  169. end
  170. if seen_data then
  171. table.insert(out, string.format('--%s--\r\n', boundary))
  172. end
  173. return out
  174. end
  175. local function parse_cloudmark_reply(task, rule, body)
  176. local parser = ucl.parser()
  177. local ret, err = parser:parse_string(body)
  178. if not ret then
  179. rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
  180. task:insert_result(rule.symbol_fail, 1.0, 'Parser error: ' .. err)
  181. return
  182. end
  183. local obj = parser:get_object()
  184. lua_util.debugm(N, task, 'cloudmark reply is: %s', obj)
  185. if not obj.score then
  186. rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
  187. task:insert_result(rule.symbol_fail, 1.0, 'Parser error: no score')
  188. return
  189. end
  190. if obj.analysis then
  191. -- Report analysis string
  192. rspamd_logger.infox(task, 'cloudmark report string: %s', obj.analysis)
  193. end
  194. local score = tonumber(obj.score) or 0
  195. if score >= rule.score_threshold then
  196. task:insert_result(rule.symbol_spam, 1.0, tostring(score))
  197. end
  198. if rule.add_headers and type(obj.appendHeaders) == 'table' then
  199. local headers_add = fun.tomap(fun.map(function(h)
  200. return h.headerField,{
  201. order = 1, value = h.body
  202. }
  203. end, obj.appendHeaders))
  204. lua_mime.modify_headers(task, {
  205. add = headers_add
  206. })
  207. end
  208. end
  209. local function cloudmark_check(task, content, digest, rule, maybe_part)
  210. local function cloudmark_check_uncached()
  211. local upstream = rule.upstreams:get_upstream_round_robin()
  212. local addr = upstream:get_addr()
  213. local retransmits = rule.retransmits
  214. local url = cloudmark_url(rule, addr)
  215. local message_data = task:get_content()
  216. if rule.max_message and rule.max_message > 0 and #message_data > rule.max_message then
  217. task:insert_result(rule['symbol_fail'], 0.0, 'Message too large: ' .. #message_data)
  218. return
  219. end
  220. local request = {
  221. rfc822 = {
  222. ['Content-Type'] = 'message/rfc822',
  223. data = message_data,
  224. }
  225. }
  226. local helo = task:get_helo()
  227. if helo then
  228. request['heloDomain'] = {
  229. data = helo,
  230. }
  231. end
  232. local mail_from = task:get_from('smtp') or {}
  233. if mail_from[1] and #mail_from[1].addr > 1 then
  234. request['mailFrom'] = {
  235. data = mail_from[1].addr
  236. }
  237. end
  238. local rcpt_to = task:get_recipients('smtp')
  239. if rcpt_to then
  240. request['rcptTo'] = {
  241. data = table.concat(fun.totable(fun.map(function(r) return r.addr end, rcpt_to)), ',')
  242. }
  243. end
  244. local fip = task:get_from_ip()
  245. if fip and fip:is_valid() then
  246. request['connIp'] = tostring(fip)
  247. end
  248. local hostname = task:get_hostname()
  249. if hostname then
  250. request['fromHost'] = hostname
  251. end
  252. local request_data = {
  253. task = task,
  254. url = url,
  255. body = table_to_multipart_body(request, static_boundary),
  256. headers = {
  257. ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
  258. },
  259. timeout = rule.timeout,
  260. }
  261. local function cloudmark_callback(http_err, code, body, headers)
  262. local function cloudmark_requery()
  263. -- set current upstream to fail because an error occurred
  264. upstream:fail()
  265. -- retry with another upstream until retransmits exceeds
  266. if retransmits > 0 then
  267. retransmits = retransmits - 1
  268. lua_util.debugm(rule.name, task,
  269. '%s: request Error: %s - retries left: %s',
  270. rule.log_prefix, http_err, retransmits)
  271. -- Select a different upstream!
  272. upstream = rule.upstreams:get_upstream_round_robin()
  273. addr = upstream:get_addr()
  274. url = cloudmark_url(rule, addr)
  275. lua_util.debugm(rule.name, task, '%s: retry IP: %s:%s',
  276. rule.log_prefix, addr, addr:get_port())
  277. request_data.url = url
  278. http.request(request_data)
  279. else
  280. rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits '..
  281. 'exceed', rule.log_prefix)
  282. task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and '..
  283. 'retransmits exceed')
  284. upstream:fail()
  285. end
  286. end
  287. if http_err then
  288. cloudmark_requery()
  289. else
  290. -- Parse the response
  291. if upstream then upstream:ok() end
  292. if code ~= 200 then
  293. rspamd_logger.errx(task, 'invalid HTTP code: %s, body: %s, headers: %s', code, body, headers)
  294. task:insert_result(rule.symbol_fail, 1.0, 'Bad HTTP code: ' .. code)
  295. return
  296. end
  297. parse_cloudmark_reply(task, rule, body)
  298. end
  299. end
  300. request_data.callback = cloudmark_callback
  301. http.request(request_data)
  302. end
  303. if common.condition_check_and_continue(task, content, rule, digest,
  304. cloudmark_check_uncached, maybe_part) then
  305. return
  306. else
  307. cloudmark_check_uncached()
  308. end
  309. end
  310. return {
  311. type = {'cloudmark', 'scanner'},
  312. description = 'Cloudmark cartridge interface',
  313. configure = cloudmark_config,
  314. check = cloudmark_check,
  315. name = N,
  316. }