You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_types.lua 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. if confighelp then
  14. return
  15. end
  16. -- This plugin implements mime types checks for mail messages
  17. local logger = require "rspamd_logger"
  18. local lua_util = require "lua_util"
  19. local rspamd_util = require "rspamd_util"
  20. local lua_maps = require "lua_maps"
  21. local lua_mime_types = require "lua_mime_types"
  22. local lua_magic_types = require "lua_magic/types"
  23. local fun = require "fun"
  24. local N = "mime_types"
  25. local settings = {
  26. file = '',
  27. symbol_unknown = 'MIME_UNKNOWN',
  28. symbol_bad = 'MIME_BAD',
  29. symbol_good = 'MIME_GOOD',
  30. symbol_attachment = 'MIME_BAD_ATTACHMENT',
  31. symbol_encrypted_archive = 'MIME_ENCRYPTED_ARCHIVE',
  32. symbol_obfuscated_archive = 'MIME_OBFUSCATED_ARCHIVE',
  33. symbol_exe_in_gen_split_rar = 'MIME_EXE_IN_GEN_SPLIT_RAR',
  34. symbol_archive_in_archive = 'MIME_ARCHIVE_IN_ARCHIVE',
  35. symbol_double_extension = 'MIME_DOUBLE_BAD_EXTENSION',
  36. symbol_bad_extension = 'MIME_BAD_EXTENSION',
  37. symbol_bad_unicode = 'MIME_BAD_UNICODE',
  38. regexp = false,
  39. extension_map = { -- extension -> mime_type
  40. html = 'text/html',
  41. htm = 'text/html',
  42. txt = 'text/plain',
  43. pdf = 'application/pdf'
  44. },
  45. bad_extensions = {
  46. bat = 2,
  47. com = 2,
  48. exe = 1,
  49. iso = 4,
  50. jar = 2,
  51. lnk = 4,
  52. scr = 4,
  53. -- Have you ever seen that in legit email?
  54. ace = 4,
  55. arj = 2,
  56. cab = 3,
  57. -- Additional bad extensions from Gmail
  58. ade = 2,
  59. adp = 2,
  60. chm = 2,
  61. cmd = 2,
  62. cpl = 2,
  63. ins = 2,
  64. isp = 2,
  65. js = 2,
  66. jse = 2,
  67. lib = 2,
  68. mde = 2,
  69. msc = 2,
  70. msi = 2,
  71. msp = 2,
  72. mst = 2,
  73. nsh = 2,
  74. pif = 2,
  75. sct = 2,
  76. shb = 2,
  77. sys = 2,
  78. vb = 2,
  79. vbe = 2,
  80. vbs = 2,
  81. vxd = 2,
  82. wsc = 2,
  83. wsh = 2,
  84. -- Additional bad extensions from Outlook
  85. app = 2,
  86. asp = 2,
  87. bas = 2,
  88. cnt = 2,
  89. csh = 2,
  90. diagcab = 2,
  91. fxp = 2,
  92. gadget = 2,
  93. grp = 2,
  94. hlp = 2,
  95. hpj = 2,
  96. inf = 2,
  97. its = 2,
  98. jnlp = 2,
  99. ksh = 2,
  100. mad = 2,
  101. maf = 2,
  102. mag = 2,
  103. mam = 2,
  104. maq = 2,
  105. mar = 2,
  106. mas = 2,
  107. mat = 2,
  108. mau = 2,
  109. mav = 2,
  110. maw = 2,
  111. mcf = 2,
  112. mda = 2,
  113. mdb = 2,
  114. mdt = 2,
  115. mdw = 2,
  116. mdz = 2,
  117. msh = 2,
  118. msh1 = 2,
  119. msh2 = 2,
  120. mshxml = 2,
  121. msh1xml = 2,
  122. msh2xml = 2,
  123. msu = 2,
  124. ops = 2,
  125. osd = 2,
  126. pcd = 2,
  127. pl = 2,
  128. plg = 2,
  129. prf = 2,
  130. prg = 2,
  131. printerexport = 2,
  132. ps1 = 2,
  133. ps1xml = 2,
  134. ps2 = 2,
  135. ps2xml = 2,
  136. psc1 = 2,
  137. psc2 = 2,
  138. psd1 = 2,
  139. psdm1 = 2,
  140. pst = 2,
  141. reg = 2,
  142. scf = 2,
  143. shs = 2,
  144. theme = 2,
  145. url = 2,
  146. vbp = 2,
  147. vsmacros = 2,
  148. vsw = 2,
  149. webpnp = 2,
  150. website = 2,
  151. ws = 2,
  152. xbap = 2,
  153. xll = 2,
  154. xnk = 2,
  155. },
  156. -- Something that should not be in archive
  157. bad_archive_extensions = {
  158. pptx = 0.1,
  159. docx = 0.1,
  160. xlsx = 0.1,
  161. pdf = 0.1,
  162. jar = 3,
  163. js = 0.5,
  164. vbs = 4,
  165. wsf = 4,
  166. hta = 4,
  167. },
  168. archive_extensions = {
  169. zip = 1,
  170. arj = 1,
  171. rar = 1,
  172. ace = 1,
  173. ['7z'] = 1,
  174. cab = 1,
  175. bz2 = 1,
  176. egg = 1,
  177. alz = 1,
  178. xz = 1,
  179. lz = 1,
  180. },
  181. -- Not really archives
  182. archive_exceptions = {
  183. odt = true,
  184. ods = true,
  185. odp = true,
  186. docx = true,
  187. xlsx = true,
  188. pptx = true,
  189. vsdx = true,
  190. -- jar = true,
  191. },
  192. -- Multiplier for full extension_map mismatch
  193. other_extensions_mult = 0.4,
  194. }
  195. local map = nil
  196. local function check_mime_type(task)
  197. local function gen_extension(fname)
  198. local parts = lua_util.str_split(fname or '', '.')
  199. local ext = {}
  200. for n = 1, 2 do
  201. ext[n] = #parts > n and string.lower(parts[#parts + 1 - n]) or nil
  202. end
  203. return ext[1],ext[2],parts
  204. end
  205. local function check_filename(fname, ct, is_archive, part, detected_ext, nfiles)
  206. lua_util.debugm(N, task, "check filename: %s, ct=%s, is_archive=%s, detected_ext=%s, nfiles=%s",
  207. fname, ct, is_archive, detected_ext, nfiles)
  208. local has_bad_unicode, char, ch_pos = rspamd_util.has_obscured_unicode(fname)
  209. if has_bad_unicode then
  210. task:insert_result(settings.symbol_bad_unicode, 1.0,
  211. string.format("0x%xd after %s", char,
  212. fname:sub(1, ch_pos)))
  213. end
  214. -- Decode hex encoded characters
  215. fname = string.gsub(fname, '%%(%x%x)',
  216. function (hex) return string.char(tonumber(hex,16)) end )
  217. -- Replace potentially bad characters with '?'
  218. fname = fname:gsub('[^%s%g]', '?')
  219. -- Check file is in filename whitelist
  220. if settings.filename_whitelist and
  221. settings.filename_whitelist:get_key(fname) then
  222. logger.debugm("mime_types", task, "skip checking of %s - file is in filename whitelist",
  223. fname)
  224. return
  225. end
  226. local ext,ext2,parts = gen_extension(fname)
  227. -- ext is the last extension, LOWERCASED
  228. -- ext2 is the one before last extension LOWERCASED
  229. local detected
  230. if not is_archive and detected_ext then
  231. detected = lua_magic_types[detected_ext]
  232. end
  233. if detected_ext and ((not ext) or ext ~= detected_ext) then
  234. -- Try to find extension by real content type
  235. check_filename('detected.' .. detected_ext, detected.ct,
  236. false, part, nil, 1)
  237. end
  238. if not ext then return end
  239. local function check_extension(badness_mult, badness_mult2)
  240. if not badness_mult and not badness_mult2 then return end
  241. if #parts > 2 then
  242. -- We need to ensure that next-to-last extension is an extension,
  243. -- so we check for its length and if it is not a number or date
  244. if #ext2 > 0 and #ext2 <= 4 and not string.match(ext2, '^%d+[%]%)]?$') then
  245. -- Use the greatest badness multiplier
  246. if not badness_mult or
  247. (badness_mult2 and badness_mult < badness_mult2) then
  248. badness_mult = badness_mult2
  249. end
  250. -- Double extension + bad extension == VERY bad
  251. task:insert_result(settings['symbol_double_extension'], badness_mult,
  252. string.format(".%s.%s", ext2, ext))
  253. task:insert_result('MIME_TRACE', 0.0,
  254. string.format("%s:%s", part:get_id(), '-'))
  255. return
  256. end
  257. end
  258. if badness_mult then
  259. -- Just bad extension
  260. task:insert_result(settings['symbol_bad_extension'], badness_mult, ext)
  261. task:insert_result('MIME_TRACE', 0.0,
  262. string.format("%s:%s", part:get_id(), '-'))
  263. end
  264. end
  265. -- Process settings
  266. local extra_table = {}
  267. local extra_archive_table = {}
  268. local user_settings = task:cache_get('settings')
  269. if user_settings and user_settings.plugins then
  270. user_settings = user_settings.plugins.mime_types
  271. end
  272. if user_settings then
  273. logger.infox(task, 'using special tables from user settings')
  274. if user_settings.bad_extensions then
  275. if user_settings.bad_extensions[1] then
  276. -- Convert to a key-value map
  277. extra_table = fun.tomap(
  278. fun.map(function(e) return e,1.0 end,
  279. user_settings.bad_extensions))
  280. else
  281. extra_table = user_settings.bad_extensions
  282. end
  283. end
  284. if user_settings.bad_archive_extensions then
  285. if user_settings.bad_archive_extensions[1] then
  286. -- Convert to a key-value map
  287. extra_archive_table = fun.tomap(fun.map(
  288. function(e) return e,1.0 end,
  289. user_settings.bad_archive_extensions))
  290. else
  291. extra_archive_table = user_settings.bad_archive_extensions
  292. end
  293. end
  294. end
  295. local function check_tables(e)
  296. if is_archive then
  297. return extra_archive_table[e] or (nfiles < 2 and settings.bad_archive_extensions[e]) or
  298. extra_table[e] or settings.bad_extensions[e]
  299. end
  300. return extra_table[e] or settings.bad_extensions[e]
  301. end
  302. -- Also check for archive bad extension
  303. if is_archive then
  304. if ext2 then
  305. local score1 = check_tables(ext)
  306. local score2 = check_tables(ext2)
  307. check_extension(score1, score2)
  308. else
  309. local score1 = check_tables(ext)
  310. check_extension(score1, nil)
  311. end
  312. if settings['archive_extensions'][ext] then
  313. -- Archive in archive
  314. task:insert_result(settings['symbol_archive_in_archive'], 1.0, ext)
  315. task:insert_result('MIME_TRACE', 0.0,
  316. string.format("%s:%s", part:get_id(), '-'))
  317. end
  318. else
  319. if ext2 then
  320. local score1 = check_tables(ext)
  321. local score2 = check_tables(ext2)
  322. check_extension(score1, score2)
  323. -- Check for archive cloaking like .zip.gz
  324. if settings['archive_extensions'][ext2]
  325. -- Exclude multipart archive extensions, e.g. .zip.001
  326. and not string.match(ext, '^%d+$')
  327. then
  328. task:insert_result(settings['symbol_archive_in_archive'],
  329. 1.0, string.format(".%s.%s", ext2, ext))
  330. task:insert_result('MIME_TRACE', 0.0,
  331. string.format("%s:%s", part:get_id(), '-'))
  332. end
  333. else
  334. local score1 = check_tables(ext)
  335. check_extension(score1, nil)
  336. end
  337. end
  338. local mt = settings['extension_map'][ext]
  339. if mt and ct and ct ~= 'application/octet-stream' then
  340. local found
  341. local mult
  342. for _,v in ipairs(mt) do
  343. mult = v.mult
  344. if ct == v.ct then
  345. found = true
  346. break
  347. end
  348. end
  349. if not found then
  350. task:insert_result(settings['symbol_attachment'], mult, string.format('%s:%s',
  351. ext, ct))
  352. end
  353. end
  354. end
  355. local parts = task:get_parts()
  356. if parts then
  357. for _,p in ipairs(parts) do
  358. local mtype,subtype = p:get_type()
  359. if not mtype then
  360. lua_util.debugm(N, task, "no content type for part: %s", p:get_id())
  361. task:insert_result(settings['symbol_unknown'], 1.0, 'missing content type')
  362. task:insert_result('MIME_TRACE', 0.0,
  363. string.format("%s:%s", p:get_id(), '~'))
  364. else
  365. -- Check for attachment
  366. local filename = p:get_filename()
  367. local ct = string.format('%s/%s', mtype, subtype):lower()
  368. local detected_ext = p:get_detected_ext()
  369. if filename then
  370. check_filename(filename, ct, false, p, detected_ext, 1)
  371. end
  372. if p:is_archive() then
  373. local check = true
  374. if detected_ext then
  375. local detected_type = lua_magic_types[detected_ext]
  376. if detected_type.type ~= 'archive' then
  377. logger.debugm("mime_types", task, "skip checking of %s as archive, %s is not archive but %s",
  378. filename, detected_type.type)
  379. check = false
  380. end
  381. end
  382. if check and filename then
  383. local ext = gen_extension(filename)
  384. if ext and settings.archive_exceptions[ext] then
  385. check = false
  386. logger.debugm("mime_types", task, "skip checking of %s as archive, %s is whitelisted",
  387. filename, ext)
  388. end
  389. end
  390. local arch = p:get_archive()
  391. -- TODO: migrate to flags once C part is ready
  392. if arch:is_encrypted() then
  393. task:insert_result(settings.symbol_encrypted_archive, 1.0, filename)
  394. task:insert_result('MIME_TRACE', 0.0,
  395. string.format("%s:%s", p:get_id(), '-'))
  396. elseif arch:is_unreadable() then
  397. task:insert_result(settings.symbol_encrypted_archive, 0.5, {
  398. 'compressed header',
  399. filename,
  400. })
  401. task:insert_result('MIME_TRACE', 0.0,
  402. string.format("%s:%s", p:get_id(), '-'))
  403. elseif arch:is_obfuscated() then
  404. task:insert_result(settings.symbol_obfuscated_archive, 1.0, {
  405. 'obfuscated archive',
  406. filename,
  407. })
  408. task:insert_result('MIME_TRACE', 0.0,
  409. string.format("%s:%s", p:get_id(), '-'))
  410. end
  411. if check then
  412. local is_gen_split_rar = false
  413. if filename then
  414. local ext = gen_extension(filename)
  415. is_gen_split_rar = ext and (string.match(ext, '^%d%d%d$')) and (arch:get_type() == 'rar')
  416. end
  417. local fl = arch:get_files_full(1000)
  418. local nfiles = #fl
  419. for _,f in ipairs(fl) do
  420. if f['encrypted'] then
  421. task:insert_result(settings['symbol_encrypted_archive'],
  422. 1.0, f['name'])
  423. task:insert_result('MIME_TRACE', 0.0,
  424. string.format("%s:%s", p:get_id(), '-'))
  425. end
  426. if f['name'] then
  427. if is_gen_split_rar and (gen_extension(f['name']) or '') == 'exe' then
  428. task:insert_result(settings['symbol_exe_in_gen_split_rar'], 1.0, f['name'])
  429. else
  430. check_filename(f['name'], nil,
  431. true, p, nil, nfiles)
  432. end
  433. end
  434. end
  435. if nfiles == 1 and fl[1].name then
  436. -- We check that extension of the file inside archive is
  437. -- the same as double extension of the file
  438. local _,ext2 = gen_extension(filename)
  439. if ext2 and #ext2 > 0 then
  440. local enc_ext = gen_extension(fl[1].name)
  441. if enc_ext
  442. and settings['bad_extensions'][enc_ext]
  443. and not tonumber(ext2)
  444. and enc_ext ~= ext2 then
  445. task:insert_result(settings['symbol_double_extension'], 2.0,
  446. string.format("%s!=%s", ext2, enc_ext))
  447. end
  448. end
  449. end
  450. end
  451. end
  452. if map then
  453. local v = map:get_key(ct)
  454. local detected_different = false
  455. local detected_type
  456. if detected_ext then
  457. detected_type = lua_magic_types[detected_ext]
  458. end
  459. if detected_type and detected_type.ct ~= ct then
  460. local v_detected = map:get_key(detected_type.ct)
  461. if not v or v_detected and v_detected > v then v = v_detected end
  462. detected_different = true
  463. end
  464. if v then
  465. local n = tonumber(v)
  466. if n then
  467. if n > 0 then
  468. if detected_different then
  469. -- Penalize case
  470. n = n * 1.5
  471. task:insert_result(settings['symbol_bad'], n,
  472. string.format('%s:%s', ct, detected_type.ct))
  473. else
  474. task:insert_result(settings['symbol_bad'], n, ct)
  475. end
  476. task:insert_result('MIME_TRACE', 0.0,
  477. string.format("%s:%s", p:get_id(), '-'))
  478. elseif n < 0 then
  479. task:insert_result(settings['symbol_good'], -n, ct)
  480. task:insert_result('MIME_TRACE', 0.0,
  481. string.format("%s:%s", p:get_id(), '+'))
  482. else
  483. -- Neutral content type
  484. task:insert_result('MIME_TRACE', 0.0,
  485. string.format("%s:%s", p:get_id(), '~'))
  486. end
  487. else
  488. logger.warnx(task, 'unknown value: "%s" for content type %s in the map',
  489. v, ct)
  490. end
  491. else
  492. task:insert_result(settings['symbol_unknown'], 1.0, ct)
  493. task:insert_result('MIME_TRACE', 0.0,
  494. string.format("%s:%s", p:get_id(), '~'))
  495. end
  496. end
  497. end
  498. end
  499. end
  500. end
  501. local opts = rspamd_config:get_all_opt('mime_types')
  502. if opts then
  503. for k,v in pairs(opts) do
  504. settings[k] = v
  505. end
  506. settings.filename_whitelist = lua_maps.rspamd_map_add('mime_types', 'filename_whitelist', 'regexp',
  507. 'filename whitelist')
  508. local function change_extension_map_entry(ext, ct, mult)
  509. if type(ct) == 'table' then
  510. local tbl = {}
  511. for _,elt in ipairs(ct) do
  512. table.insert(tbl, {
  513. ct = elt,
  514. mult = mult,
  515. })
  516. end
  517. settings.extension_map[ext] = tbl
  518. else
  519. settings.extension_map[ext] = { [1] = {
  520. ct = ct,
  521. mult = mult
  522. } }
  523. end
  524. end
  525. -- Transform extension_map
  526. for ext,ct in pairs(settings.extension_map) do
  527. change_extension_map_entry(ext, ct, 1.0)
  528. end
  529. -- Add all extensions
  530. for _,pair in ipairs(lua_mime_types.full_extensions_map) do
  531. local ext, ct = pair[1], pair[2]
  532. if not settings.extension_map[ext] then
  533. change_extension_map_entry(ext, ct, settings.other_extensions_mult)
  534. end
  535. end
  536. local map_type = 'map'
  537. if settings['regexp'] then map_type = 'regexp' end
  538. map = lua_maps.rspamd_map_add('mime_types', 'file', map_type,
  539. 'mime types map')
  540. if map then
  541. local id = rspamd_config:register_symbol({
  542. name = 'MIME_TYPES_CALLBACK',
  543. callback = check_mime_type,
  544. type = 'callback',
  545. flags = 'nostat',
  546. group = 'mime_types',
  547. })
  548. rspamd_config:register_symbol({
  549. type = 'virtual',
  550. name = settings['symbol_unknown'],
  551. parent = id,
  552. group = 'mime_types',
  553. })
  554. rspamd_config:register_symbol({
  555. type = 'virtual',
  556. name = settings['symbol_bad'],
  557. parent = id,
  558. group = 'mime_types',
  559. })
  560. rspamd_config:register_symbol({
  561. type = 'virtual',
  562. name = settings['symbol_good'],
  563. flags = 'nice',
  564. parent = id,
  565. group = 'mime_types',
  566. })
  567. rspamd_config:register_symbol({
  568. type = 'virtual',
  569. name = settings['symbol_attachment'],
  570. parent = id,
  571. group = 'mime_types',
  572. })
  573. rspamd_config:register_symbol({
  574. type = 'virtual',
  575. name = settings['symbol_encrypted_archive'],
  576. parent = id,
  577. group = 'mime_types',
  578. })
  579. rspamd_config:register_symbol({
  580. type = 'virtual',
  581. name = settings['symbol_obfuscated_archive'],
  582. parent = id,
  583. group = 'mime_types',
  584. })
  585. rspamd_config:register_symbol({
  586. type = 'virtual',
  587. name = settings['symbol_exe_in_gen_split_rar'],
  588. parent = id,
  589. group = 'mime_types',
  590. })
  591. rspamd_config:register_symbol({
  592. type = 'virtual',
  593. name = settings['symbol_archive_in_archive'],
  594. parent = id,
  595. group = 'mime_types',
  596. })
  597. rspamd_config:register_symbol({
  598. type = 'virtual',
  599. name = settings['symbol_double_extension'],
  600. parent = id,
  601. group = 'mime_types',
  602. })
  603. rspamd_config:register_symbol({
  604. type = 'virtual',
  605. name = settings['symbol_bad_extension'],
  606. parent = id,
  607. group = 'mime_types',
  608. })
  609. rspamd_config:register_symbol({
  610. type = 'virtual',
  611. name = settings['symbol_bad_unicode'],
  612. parent = id,
  613. group = 'mime_types',
  614. })
  615. rspamd_config:register_symbol({
  616. type = 'virtual',
  617. name = 'MIME_TRACE',
  618. parent = id,
  619. group = 'mime_types',
  620. flags = 'nostat',
  621. score = 0,
  622. })
  623. else
  624. lua_util.disable_module(N, "config")
  625. end
  626. end