You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_types.lua 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687
  1. --[[
  2. Copyright (c) 2016, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. if confighelp then
  14. return
  15. end
  16. -- This plugin implements mime types checks for mail messages
  17. local logger = require "rspamd_logger"
  18. local lua_util = require "lua_util"
  19. local rspamd_util = require "rspamd_util"
  20. local lua_maps = require "lua_maps"
  21. local lua_mime_types = require "lua_mime_types"
  22. local lua_magic_types = require "lua_magic/types"
  23. local fun = require "fun"
  24. local N = "mime_types"
  25. local settings = {
  26. file = '',
  27. symbol_unknown = 'MIME_UNKNOWN',
  28. symbol_bad = 'MIME_BAD',
  29. symbol_good = 'MIME_GOOD',
  30. symbol_attachment = 'MIME_BAD_ATTACHMENT',
  31. symbol_encrypted_archive = 'MIME_ENCRYPTED_ARCHIVE',
  32. symbol_obfuscated_archive = 'MIME_OBFUSCATED_ARCHIVE',
  33. symbol_exe_in_gen_split_rar = 'MIME_EXE_IN_GEN_SPLIT_RAR',
  34. symbol_archive_in_archive = 'MIME_ARCHIVE_IN_ARCHIVE',
  35. symbol_double_extension = 'MIME_DOUBLE_BAD_EXTENSION',
  36. symbol_bad_extension = 'MIME_BAD_EXTENSION',
  37. symbol_bad_unicode = 'MIME_BAD_UNICODE',
  38. regexp = false,
  39. extension_map = { -- extension -> mime_type
  40. html = 'text/html',
  41. htm = 'text/html',
  42. txt = 'text/plain',
  43. pdf = 'application/pdf'
  44. },
  45. bad_extensions = {
  46. bat = 2,
  47. com = 2,
  48. exe = 1,
  49. iso = 4,
  50. jar = 2,
  51. lnk = 4,
  52. scr = 4,
  53. -- Have you ever seen that in legit email?
  54. ace = 4,
  55. arj = 2,
  56. cab = 3,
  57. -- Additional bad extensions from Gmail
  58. ade = 2,
  59. adp = 2,
  60. chm = 2,
  61. cmd = 2,
  62. cpl = 2,
  63. ins = 2,
  64. isp = 2,
  65. js = 2,
  66. jse = 2,
  67. lib = 2,
  68. mde = 2,
  69. msc = 2,
  70. msi = 2,
  71. msp = 2,
  72. mst = 2,
  73. nsh = 2,
  74. pif = 2,
  75. sct = 2,
  76. shb = 2,
  77. sys = 2,
  78. vb = 2,
  79. vbe = 2,
  80. vbs = 2,
  81. vxd = 2,
  82. wsc = 2,
  83. wsh = 2,
  84. -- Additional bad extensions from Outlook
  85. app = 2,
  86. asp = 2,
  87. bas = 2,
  88. cnt = 2,
  89. csh = 2,
  90. diagcab = 2,
  91. fxp = 2,
  92. gadget = 2,
  93. grp = 2,
  94. hlp = 2,
  95. hpj = 2,
  96. inf = 2,
  97. its = 2,
  98. jnlp = 2,
  99. ksh = 2,
  100. mad = 2,
  101. maf = 2,
  102. mag = 2,
  103. mam = 2,
  104. maq = 2,
  105. mar = 2,
  106. mas = 2,
  107. mat = 2,
  108. mau = 2,
  109. mav = 2,
  110. maw = 2,
  111. mcf = 2,
  112. mda = 2,
  113. mdb = 2,
  114. mdt = 2,
  115. mdw = 2,
  116. mdz = 2,
  117. msh = 2,
  118. msh1 = 2,
  119. msh2 = 2,
  120. mshxml = 2,
  121. msh1xml = 2,
  122. msh2xml = 2,
  123. msu = 2,
  124. ops = 2,
  125. osd = 2,
  126. pcd = 2,
  127. pl = 2,
  128. plg = 2,
  129. prf = 2,
  130. prg = 2,
  131. printerexport = 2,
  132. ps1 = 2,
  133. ps1xml = 2,
  134. ps2 = 2,
  135. ps2xml = 2,
  136. psc1 = 2,
  137. psc2 = 2,
  138. psd1 = 2,
  139. psdm1 = 2,
  140. pst = 2,
  141. reg = 2,
  142. scf = 2,
  143. shs = 2,
  144. theme = 2,
  145. url = 2,
  146. vbp = 2,
  147. vsmacros = 2,
  148. vsw = 2,
  149. webpnp = 2,
  150. website = 2,
  151. ws = 2,
  152. xbap = 2,
  153. xll = 2,
  154. xnk = 2,
  155. },
  156. -- Something that should not be in archive
  157. bad_archive_extensions = {
  158. pptx = 0.1,
  159. docx = 0.1,
  160. xlsx = 0.1,
  161. pdf = 0.1,
  162. jar = 3,
  163. js = 0.5,
  164. vbs = 4,
  165. wsf = 4,
  166. hta = 4,
  167. },
  168. archive_extensions = {
  169. zip = 1,
  170. arj = 1,
  171. rar = 1,
  172. ace = 1,
  173. ['7z'] = 1,
  174. cab = 1,
  175. bz2 = 1,
  176. egg = 1,
  177. alz = 1,
  178. xz = 1,
  179. lz = 1,
  180. },
  181. -- Not really archives
  182. archive_exceptions = {
  183. odt = true,
  184. ods = true,
  185. odp = true,
  186. docx = true,
  187. xlsx = true,
  188. pptx = true,
  189. vsdx = true,
  190. -- jar = true,
  191. },
  192. -- Multiplier for full extension_map mismatch
  193. other_extensions_mult = 0.4,
  194. }
  195. local map = nil
  196. local function check_mime_type(task)
  197. local function gen_extension(fname)
  198. local parts = lua_util.str_split(fname or '', '.')
  199. local ext = {}
  200. for n = 1, 2 do
  201. ext[n] = #parts > n and string.lower(parts[#parts + 1 - n]) or nil
  202. end
  203. return ext[1],ext[2],parts
  204. end
  205. local function check_filename(fname, ct, is_archive, part, detected_ext, nfiles)
  206. local has_bad_unicode, char, ch_pos = rspamd_util.has_obscured_unicode(fname)
  207. if has_bad_unicode then
  208. task:insert_result(settings.symbol_bad_unicode, 1.0,
  209. string.format("0x%xd after %s", char,
  210. fname:sub(1, ch_pos)))
  211. end
  212. -- Decode hex encoded characters
  213. fname = string.gsub(fname, '%%(%x%x)',
  214. function (hex) return string.char(tonumber(hex,16)) end )
  215. -- Replace potentially bad characters with '?'
  216. fname = fname:gsub('[^%s%g]', '?')
  217. -- Check file is in filename whitelist
  218. if settings.filename_whitelist and
  219. settings.filename_whitelist:get_key(fname) then
  220. logger.debugm("mime_types", task, "skip checking of %s - file is in filename whitelist",
  221. fname)
  222. return
  223. end
  224. local ext,ext2,parts = gen_extension(fname)
  225. -- ext is the last extension, LOWERCASED
  226. -- ext2 is the one before last extension LOWERCASED
  227. local detected
  228. if not is_archive and detected_ext then
  229. detected = lua_magic_types[detected_ext]
  230. end
  231. if detected_ext and ((not ext) or ext ~= detected_ext) then
  232. -- Try to find extension by real content type
  233. check_filename('detected.' .. detected_ext, detected.ct,
  234. false, part, nil, 1)
  235. end
  236. if not ext then return end
  237. local function check_extension(badness_mult, badness_mult2)
  238. if not badness_mult and not badness_mult2 then return end
  239. if #parts > 2 then
  240. -- We need to ensure that next-to-last extension is an extension,
  241. -- so we check for its length and if it is not a number or date
  242. if #ext2 > 0 and #ext2 <= 4 and not string.match(ext2, '^%d+[%]%)]?$') then
  243. -- Use the greatest badness multiplier
  244. if not badness_mult or
  245. (badness_mult2 and badness_mult < badness_mult2) then
  246. badness_mult = badness_mult2
  247. end
  248. -- Double extension + bad extension == VERY bad
  249. task:insert_result(settings['symbol_double_extension'], badness_mult,
  250. string.format(".%s.%s", ext2, ext))
  251. task:insert_result('MIME_TRACE', 0.0,
  252. string.format("%s:%s", part:get_id(), '-'))
  253. return
  254. end
  255. end
  256. if badness_mult then
  257. -- Just bad extension
  258. task:insert_result(settings['symbol_bad_extension'], badness_mult, ext)
  259. task:insert_result('MIME_TRACE', 0.0,
  260. string.format("%s:%s", part:get_id(), '-'))
  261. end
  262. end
  263. -- Process settings
  264. local extra_table = {}
  265. local extra_archive_table = {}
  266. local user_settings = task:cache_get('settings')
  267. if user_settings and user_settings.plugins then
  268. user_settings = user_settings.plugins.mime_types
  269. end
  270. if user_settings then
  271. logger.infox(task, 'using special tables from user settings')
  272. if user_settings.bad_extensions then
  273. if user_settings.bad_extensions[1] then
  274. -- Convert to a key-value map
  275. extra_table = fun.tomap(
  276. fun.map(function(e) return e,1.0 end,
  277. user_settings.bad_extensions))
  278. else
  279. extra_table = user_settings.bad_extensions
  280. end
  281. end
  282. if user_settings.bad_archive_extensions then
  283. if user_settings.bad_archive_extensions[1] then
  284. -- Convert to a key-value map
  285. extra_archive_table = fun.tomap(fun.map(
  286. function(e) return e,1.0 end,
  287. user_settings.bad_archive_extensions))
  288. else
  289. extra_archive_table = user_settings.bad_archive_extensions
  290. end
  291. end
  292. end
  293. local function check_tables(e)
  294. if is_archive then
  295. return extra_archive_table[e] or (nfiles < 2 and settings.bad_archive_extensions[e]) or
  296. extra_table[e] or settings.bad_extensions[e]
  297. end
  298. return extra_table[e] or settings.bad_extensions[e]
  299. end
  300. -- Also check for archive bad extension
  301. if is_archive then
  302. if ext2 then
  303. local score1 = check_tables(ext)
  304. local score2 = check_tables(ext2)
  305. check_extension(score1, score2)
  306. else
  307. local score1 = check_tables(ext)
  308. check_extension(score1, nil)
  309. end
  310. if settings['archive_extensions'][ext] then
  311. -- Archive in archive
  312. task:insert_result(settings['symbol_archive_in_archive'], 1.0, ext)
  313. task:insert_result('MIME_TRACE', 0.0,
  314. string.format("%s:%s", part:get_id(), '-'))
  315. end
  316. else
  317. if ext2 then
  318. local score1 = check_tables(ext)
  319. local score2 = check_tables(ext2)
  320. check_extension(score1, score2)
  321. -- Check for archive cloaking like .zip.gz
  322. if settings['archive_extensions'][ext2]
  323. -- Exclude multipart archive extensions, e.g. .zip.001
  324. and not string.match(ext, '^%d+$')
  325. then
  326. task:insert_result(settings['symbol_archive_in_archive'],
  327. 1.0, string.format(".%s.%s", ext2, ext))
  328. task:insert_result('MIME_TRACE', 0.0,
  329. string.format("%s:%s", part:get_id(), '-'))
  330. end
  331. else
  332. local score1 = check_tables(ext)
  333. check_extension(score1, nil)
  334. end
  335. end
  336. local mt = settings['extension_map'][ext]
  337. if mt and ct and ct ~= 'application/octet-stream' then
  338. local found
  339. local mult
  340. for _,v in ipairs(mt) do
  341. mult = v.mult
  342. if ct == v.ct then
  343. found = true
  344. break
  345. end
  346. end
  347. if not found then
  348. task:insert_result(settings['symbol_attachment'], mult, string.format('%s:%s',
  349. ext, ct))
  350. end
  351. end
  352. end
  353. local parts = task:get_parts()
  354. if parts then
  355. for _,p in ipairs(parts) do
  356. local mtype,subtype = p:get_type()
  357. if not mtype then
  358. lua_util.debugm(N, task, "no content type for part: %s", p:get_id())
  359. task:insert_result(settings['symbol_unknown'], 1.0, 'missing content type')
  360. task:insert_result('MIME_TRACE', 0.0,
  361. string.format("%s:%s", p:get_id(), '~'))
  362. else
  363. -- Check for attachment
  364. local filename = p:get_filename()
  365. local ct = string.format('%s/%s', mtype, subtype):lower()
  366. local detected_ext = p:get_detected_ext()
  367. if filename then
  368. check_filename(filename, ct, false, p, detected_ext, 1)
  369. end
  370. if p:is_archive() then
  371. local check = true
  372. if detected_ext then
  373. local detected_type = lua_magic_types[detected_ext]
  374. if detected_type.type ~= 'archive' then
  375. logger.debugm("mime_types", task, "skip checking of %s as archive, %s is not archive but %s",
  376. filename, detected_type.type)
  377. check = false
  378. end
  379. end
  380. if check and filename then
  381. local ext = gen_extension(filename)
  382. if ext and settings.archive_exceptions[ext] then
  383. check = false
  384. logger.debugm("mime_types", task, "skip checking of %s as archive, %s is whitelisted",
  385. filename, ext)
  386. end
  387. end
  388. local arch = p:get_archive()
  389. -- TODO: migrate to flags once C part is ready
  390. if arch:is_encrypted() then
  391. task:insert_result(settings.symbol_encrypted_archive, 1.0, filename)
  392. task:insert_result('MIME_TRACE', 0.0,
  393. string.format("%s:%s", p:get_id(), '-'))
  394. elseif arch:is_unreadable() then
  395. task:insert_result(settings.symbol_encrypted_archive, 0.5, {
  396. 'compressed header',
  397. filename,
  398. })
  399. task:insert_result('MIME_TRACE', 0.0,
  400. string.format("%s:%s", p:get_id(), '-'))
  401. elseif arch:is_obfuscated() then
  402. task:insert_result(settings.symbol_obfuscated_archive, 1.0, {
  403. 'obfuscated archive',
  404. filename,
  405. })
  406. task:insert_result('MIME_TRACE', 0.0,
  407. string.format("%s:%s", p:get_id(), '-'))
  408. end
  409. if check then
  410. local is_gen_split_rar = false
  411. if filename then
  412. local ext = gen_extension(filename)
  413. is_gen_split_rar = ext and (string.match(ext, '^%d%d%d$')) and (arch:get_type() == 'rar')
  414. end
  415. local fl = arch:get_files_full(1000)
  416. local nfiles = #fl
  417. for _,f in ipairs(fl) do
  418. if f['encrypted'] then
  419. task:insert_result(settings['symbol_encrypted_archive'],
  420. 1.0, f['name'])
  421. task:insert_result('MIME_TRACE', 0.0,
  422. string.format("%s:%s", p:get_id(), '-'))
  423. end
  424. if f['name'] then
  425. if is_gen_split_rar and (gen_extension(f['name']) or '') == 'exe' then
  426. task:insert_result(settings['symbol_exe_in_gen_split_rar'], 1.0, f['name'])
  427. else
  428. check_filename(f['name'], nil,
  429. true, p, nil, nfiles)
  430. end
  431. end
  432. end
  433. if nfiles == 1 and fl[1].name then
  434. -- We check that extension of the file inside archive is
  435. -- the same as double extension of the file
  436. local _,ext2 = gen_extension(filename)
  437. if ext2 and #ext2 > 0 then
  438. local enc_ext = gen_extension(fl[1].name)
  439. if enc_ext
  440. and settings['bad_extensions'][enc_ext]
  441. and not tonumber(ext2)
  442. and enc_ext ~= ext2 then
  443. task:insert_result(settings['symbol_double_extension'], 2.0,
  444. string.format("%s!=%s", ext2, enc_ext))
  445. end
  446. end
  447. end
  448. end
  449. end
  450. if map then
  451. local v = map:get_key(ct)
  452. local detected_different = false
  453. local detected_type
  454. if detected_ext then
  455. detected_type = lua_magic_types[detected_ext]
  456. end
  457. if detected_type and detected_type.ct ~= ct then
  458. local v_detected = map:get_key(detected_type.ct)
  459. if not v or v_detected and v_detected > v then v = v_detected end
  460. detected_different = true
  461. end
  462. if v then
  463. local n = tonumber(v)
  464. if n then
  465. if n > 0 then
  466. if detected_different then
  467. -- Penalize case
  468. n = n * 1.5
  469. task:insert_result(settings['symbol_bad'], n,
  470. string.format('%s:%s', ct, detected_type.ct))
  471. else
  472. task:insert_result(settings['symbol_bad'], n, ct)
  473. end
  474. task:insert_result('MIME_TRACE', 0.0,
  475. string.format("%s:%s", p:get_id(), '-'))
  476. elseif n < 0 then
  477. task:insert_result(settings['symbol_good'], -n, ct)
  478. task:insert_result('MIME_TRACE', 0.0,
  479. string.format("%s:%s", p:get_id(), '+'))
  480. else
  481. -- Neutral content type
  482. task:insert_result('MIME_TRACE', 0.0,
  483. string.format("%s:%s", p:get_id(), '~'))
  484. end
  485. else
  486. logger.warnx(task, 'unknown value: "%s" for content type %s in the map',
  487. v, ct)
  488. end
  489. else
  490. task:insert_result(settings['symbol_unknown'], 1.0, ct)
  491. task:insert_result('MIME_TRACE', 0.0,
  492. string.format("%s:%s", p:get_id(), '~'))
  493. end
  494. end
  495. end
  496. end
  497. end
  498. end
  499. local opts = rspamd_config:get_all_opt('mime_types')
  500. if opts then
  501. for k,v in pairs(opts) do
  502. settings[k] = v
  503. end
  504. settings.filename_whitelist = lua_maps.rspamd_map_add('mime_types', 'filename_whitelist', 'regexp',
  505. 'filename whitelist')
  506. local function change_extension_map_entry(ext, ct, mult)
  507. if type(ct) == 'table' then
  508. local tbl = {}
  509. for _,elt in ipairs(ct) do
  510. table.insert(tbl, {
  511. ct = elt,
  512. mult = mult,
  513. })
  514. end
  515. settings.extension_map[ext] = tbl
  516. else
  517. settings.extension_map[ext] = { [1] = {
  518. ct = ct,
  519. mult = mult
  520. } }
  521. end
  522. end
  523. -- Transform extension_map
  524. for ext,ct in pairs(settings.extension_map) do
  525. change_extension_map_entry(ext, ct, 1.0)
  526. end
  527. -- Add all extensions
  528. for _,pair in ipairs(lua_mime_types.full_extensions_map) do
  529. local ext, ct = pair[1], pair[2]
  530. if not settings.extension_map[ext] then
  531. change_extension_map_entry(ext, ct, settings.other_extensions_mult)
  532. end
  533. end
  534. local map_type = 'map'
  535. if settings['regexp'] then map_type = 'regexp' end
  536. map = lua_maps.rspamd_map_add('mime_types', 'file', map_type,
  537. 'mime types map')
  538. if map then
  539. local id = rspamd_config:register_symbol({
  540. name = 'MIME_TYPES_CALLBACK',
  541. callback = check_mime_type,
  542. type = 'callback',
  543. flags = 'nostat',
  544. group = 'mime_types',
  545. })
  546. rspamd_config:register_symbol({
  547. type = 'virtual',
  548. name = settings['symbol_unknown'],
  549. parent = id,
  550. group = 'mime_types',
  551. })
  552. rspamd_config:register_symbol({
  553. type = 'virtual',
  554. name = settings['symbol_bad'],
  555. parent = id,
  556. group = 'mime_types',
  557. })
  558. rspamd_config:register_symbol({
  559. type = 'virtual',
  560. name = settings['symbol_good'],
  561. flags = 'nice',
  562. parent = id,
  563. group = 'mime_types',
  564. })
  565. rspamd_config:register_symbol({
  566. type = 'virtual',
  567. name = settings['symbol_attachment'],
  568. parent = id,
  569. group = 'mime_types',
  570. })
  571. rspamd_config:register_symbol({
  572. type = 'virtual',
  573. name = settings['symbol_encrypted_archive'],
  574. parent = id,
  575. group = 'mime_types',
  576. })
  577. rspamd_config:register_symbol({
  578. type = 'virtual',
  579. name = settings['symbol_obfuscated_archive'],
  580. parent = id,
  581. group = 'mime_types',
  582. })
  583. rspamd_config:register_symbol({
  584. type = 'virtual',
  585. name = settings['symbol_exe_in_gen_split_rar'],
  586. parent = id,
  587. group = 'mime_types',
  588. })
  589. rspamd_config:register_symbol({
  590. type = 'virtual',
  591. name = settings['symbol_archive_in_archive'],
  592. parent = id,
  593. group = 'mime_types',
  594. })
  595. rspamd_config:register_symbol({
  596. type = 'virtual',
  597. name = settings['symbol_double_extension'],
  598. parent = id,
  599. group = 'mime_types',
  600. })
  601. rspamd_config:register_symbol({
  602. type = 'virtual',
  603. name = settings['symbol_bad_extension'],
  604. parent = id,
  605. group = 'mime_types',
  606. })
  607. rspamd_config:register_symbol({
  608. type = 'virtual',
  609. name = settings['symbol_bad_unicode'],
  610. parent = id,
  611. group = 'mime_types',
  612. })
  613. rspamd_config:register_symbol({
  614. type = 'virtual',
  615. name = 'MIME_TRACE',
  616. parent = id,
  617. group = 'mime_types',
  618. flags = 'nostat',
  619. score = 0,
  620. })
  621. else
  622. lua_util.disable_module(N, "config")
  623. end
  624. end