You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime.lua 28KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. local argparse = require "argparse"
  14. local ansicolors = require "ansicolors"
  15. local rspamd_util = require "rspamd_util"
  16. local rspamd_task = require "rspamd_task"
  17. local rspamd_text = require "rspamd_text"
  18. local rspamd_logger = require "rspamd_logger"
  19. local lua_meta = require "lua_meta"
  20. local rspamd_url = require "rspamd_url"
  21. local lua_util = require "lua_util"
  22. local lua_mime = require "lua_mime"
  23. local ucl = require "ucl"
  24. -- Define command line options
  25. local parser = argparse()
  26. :name "rspamadm mime"
  27. :description "Mime manipulations provided by Rspamd"
  28. :help_description_margin(30)
  29. :command_target("command")
  30. :require_command(true)
  31. parser:option "-c --config"
  32. :description "Path to config file"
  33. :argname("<cfg>")
  34. :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
  35. parser:mutex(
  36. parser:flag "-j --json"
  37. :description "JSON output",
  38. parser:flag "-U --ucl"
  39. :description "UCL output",
  40. parser:flag "-M --messagepack"
  41. :description "MessagePack output"
  42. )
  43. parser:flag "-C --compact"
  44. :description "Use compact format"
  45. parser:flag "--no-file"
  46. :description "Do not print filename"
  47. -- Extract subcommand
  48. local extract = parser:command "extract ex e"
  49. :description "Extracts data from MIME messages"
  50. extract:argument "file"
  51. :description "File to process"
  52. :argname "<file>"
  53. :args "+"
  54. extract:flag "-t --text"
  55. :description "Extracts plain text data from a message"
  56. extract:flag "-H --html"
  57. :description "Extracts htm data from a message"
  58. extract:option "-o --output"
  59. :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
  60. :argname("<type>")
  61. :convert {
  62. raw = "raw",
  63. content = "content",
  64. oneline = "content_oneline",
  65. decoded = "raw_parsed",
  66. decoded_utf = "raw_utf"
  67. }
  68. :default "content"
  69. extract:flag "-w --words"
  70. :description "Extracts words"
  71. extract:flag "-p --part"
  72. :description "Show part info"
  73. extract:flag "-s --structure"
  74. :description "Show structure info (e.g. HTML tags)"
  75. extract:flag "-i --invisible"
  76. :description "Show invisible content for HTML parts"
  77. extract:option "-F --words-format"
  78. :description "Words format ('stem', 'norm', 'raw', 'full')"
  79. :argname("<type>")
  80. :convert {
  81. stem = "stem",
  82. norm = "norm",
  83. raw = "raw",
  84. full = "full",
  85. }
  86. :default "stem"
  87. local stat = parser:command "stat st s"
  88. :description "Extracts statistical data from MIME messages"
  89. stat:argument "file"
  90. :description "File to process"
  91. :argname "<file>"
  92. :args "+"
  93. stat:mutex(
  94. stat:flag "-m --meta"
  95. :description "Lua metatokens",
  96. stat:flag "-b --bayes"
  97. :description "Bayes tokens",
  98. stat:flag "-F --fuzzy"
  99. :description "Fuzzy hashes"
  100. )
  101. stat:flag "-s --shingles"
  102. :description "Show shingles for fuzzy hashes"
  103. local urls = parser:command "urls url u"
  104. :description "Extracts URLs from MIME messages"
  105. urls:argument "file"
  106. :description "File to process"
  107. :argname "<file>"
  108. :args "+"
  109. urls:mutex(
  110. urls:flag "-t --tld"
  111. :description "Get TLDs only",
  112. urls:flag "-H --host"
  113. :description "Get hosts only",
  114. urls:flag "-f --full"
  115. :description "Show piecewise urls as processed by Rspamd"
  116. )
  117. urls:flag "-u --unique"
  118. :description "Print only unique urls"
  119. urls:flag "-s --sort"
  120. :description "Sort output"
  121. urls:flag "--count"
  122. :description "Print count of each printed element"
  123. urls:flag "-r --reverse"
  124. :description "Reverse sort order"
  125. local modify = parser:command "modify mod m"
  126. :description "Modifies MIME message"
  127. modify:argument "file"
  128. :description "File to process"
  129. :argname "<file>"
  130. :args "+"
  131. modify:option "-a --add-header"
  132. :description "Adds specific header"
  133. :argname "<header=value>"
  134. :count "*"
  135. modify:option "-r --remove-header"
  136. :description "Removes specific header (all occurrences)"
  137. :argname "<header>"
  138. :count "*"
  139. modify:option "-R --rewrite-header"
  140. :description "Rewrites specific header, uses Lua string.format pattern"
  141. :argname "<header=pattern>"
  142. :count "*"
  143. modify:option "-t --text-footer"
  144. :description "Adds footer to text/plain parts from a specific file"
  145. :argname "<file>"
  146. modify:option "-H --html-footer"
  147. :description "Adds footer to text/html parts from a specific file"
  148. :argname "<file>"
  149. local sign = parser:command "sign"
  150. :description "Performs DKIM signing"
  151. sign:argument "file"
  152. :description "File to process"
  153. :argname "<file>"
  154. :args "+"
  155. sign:option "-d --domain"
  156. :description "Use specific domain"
  157. :argname "<domain>"
  158. :count "1"
  159. sign:option "-s --selector"
  160. :description "Use specific selector"
  161. :argname "<selector>"
  162. :count "1"
  163. sign:option "-k --key"
  164. :description "Use specific key of file"
  165. :argname "<key>"
  166. :count "1"
  167. sign:option "-t type"
  168. :description "ARC or DKIM signing"
  169. :argname("<arc|dkim>")
  170. :convert {
  171. ['arc'] = 'arc',
  172. ['dkim'] = 'dkim',
  173. }
  174. :default 'dkim'
  175. sign:option "-o --output"
  176. :description "Output format"
  177. :argname("<message|signature>")
  178. :convert {
  179. ['message'] = 'message',
  180. ['signature'] = 'signature',
  181. }
  182. :default 'message'
  183. local dump = parser:command "dump"
  184. :description "Dumps a raw message in different formats"
  185. dump:argument "file"
  186. :description "File to process"
  187. :argname "<file>"
  188. :args "+"
  189. -- Duplicate format for convenience
  190. dump:mutex(
  191. parser:flag "-j --json"
  192. :description "JSON output",
  193. parser:flag "-U --ucl"
  194. :description "UCL output",
  195. parser:flag "-M --messagepack"
  196. :description "MessagePack output"
  197. )
  198. dump:flag "-s --split"
  199. :description "Split the output file contents such that no content is embedded"
  200. dump:option "-o --outdir"
  201. :description "Output directory"
  202. :argname("<directory>")
  203. local function load_config(opts)
  204. local _r, err = rspamd_config:load_ucl(opts['config'])
  205. if not _r then
  206. rspamd_logger.errx('cannot parse %s: %s', opts['config'], err)
  207. os.exit(1)
  208. end
  209. _r, err = rspamd_config:parse_rcl({ 'logging', 'worker' })
  210. if not _r then
  211. rspamd_logger.errx('cannot process %s: %s', opts['config'], err)
  212. os.exit(1)
  213. end
  214. end
  215. local function load_task(opts, fname)
  216. if not fname then
  217. fname = '-'
  218. end
  219. local res, task = rspamd_task.load_from_file(fname, rspamd_config)
  220. if not res then
  221. parser:error(string.format('cannot read message from %s: %s', fname,
  222. task))
  223. end
  224. if not task:process_message() then
  225. parser:error(string.format('cannot read message from %s: %s', fname,
  226. 'failed to parse'))
  227. end
  228. return task
  229. end
  230. local function highlight(fmt, ...)
  231. return ansicolors.white .. string.format(fmt, ...) .. ansicolors.reset
  232. end
  233. local function maybe_print_fname(opts, fname)
  234. if not opts.json and not opts['no-file'] then
  235. rspamd_logger.messagex(highlight('File: %s', fname))
  236. end
  237. end
  238. local function output_fmt(opts)
  239. local fmt = 'json'
  240. if opts.compact then
  241. fmt = 'json-compact'
  242. end
  243. if opts.ucl then
  244. fmt = 'ucl'
  245. end
  246. if opts.messagepack then
  247. fmt = 'msgpack'
  248. end
  249. return fmt
  250. end
  251. -- Print elements in form
  252. -- filename -> table of elements
  253. local function print_elts(elts, opts, func)
  254. local fun = require "fun"
  255. if opts.json or opts.ucl then
  256. io.write(ucl.to_format(elts, output_fmt(opts)))
  257. else
  258. fun.each(function(fname, elt)
  259. if not opts.json and not opts.ucl then
  260. if func then
  261. elt = fun.map(func, elt)
  262. end
  263. maybe_print_fname(opts, fname)
  264. fun.each(function(e)
  265. io.write(e)
  266. io.write("\n")
  267. end, elt)
  268. end
  269. end, elts)
  270. end
  271. end
  272. local function extract_handler(opts)
  273. local out_elts = {}
  274. local tasks = {}
  275. local process_func
  276. if opts.words then
  277. -- Enable stemming and urls detection
  278. load_config(opts)
  279. rspamd_url.init(rspamd_config:get_tld_path())
  280. rspamd_config:init_subsystem('langdet')
  281. end
  282. local function maybe_print_text_part_info(part, out)
  283. local fun = require "fun"
  284. if opts.part then
  285. local t = 'plain text'
  286. if part:is_html() then
  287. t = 'html'
  288. end
  289. if not opts.json and not opts.ucl then
  290. table.insert(out,
  291. rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
  292. part:get_mimepart():get_digest():sub(1, 8),
  293. t,
  294. part:get_language(),
  295. part:get_length(), part:get_raw_length(),
  296. part:get_words_count()))
  297. table.insert(out,
  298. rspamd_logger.slog('Stats: %s',
  299. fun.foldl(function(acc, k, v)
  300. if acc ~= '' then
  301. return string.format('%s, %s:%s', acc, k, v)
  302. else
  303. return string.format('%s:%s', k, v)
  304. end
  305. end, '', part:get_stats())))
  306. end
  307. end
  308. end
  309. local function maybe_print_mime_part_info(part, out)
  310. if opts.part then
  311. if not opts.json and not opts.ucl then
  312. local mtype, msubtype = part:get_type()
  313. local det_mtype, det_msubtype = part:get_detected_type()
  314. table.insert(out,
  315. rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
  316. part:get_digest():sub(1, 8),
  317. mtype, msubtype,
  318. det_mtype, det_msubtype,
  319. part:get_filename(),
  320. part:get_detected_ext(),
  321. part:get_length()))
  322. end
  323. end
  324. end
  325. local function print_words(words, full)
  326. local fun = require "fun"
  327. if not full then
  328. return table.concat(words, ' ')
  329. else
  330. return table.concat(
  331. fun.totable(
  332. fun.map(function(w)
  333. -- [1] - stemmed word
  334. -- [2] - normalised word
  335. -- [3] - raw word
  336. -- [4] - flags (table of strings)
  337. return string.format('%s|%s|%s(%s)',
  338. w[3], w[2], w[1], table.concat(w[4], ','))
  339. end, words)
  340. ),
  341. ' '
  342. )
  343. end
  344. end
  345. for _, fname in ipairs(opts.file) do
  346. local task = load_task(opts, fname)
  347. out_elts[fname] = {}
  348. if not opts.text and not opts.html then
  349. opts.text = true
  350. opts.html = true
  351. end
  352. if opts.words then
  353. local how_words = opts['words_format'] or 'stem'
  354. table.insert(out_elts[fname], 'meta_words: ' ..
  355. print_words(task:get_meta_words(how_words), how_words == 'full'))
  356. end
  357. if opts.text or opts.html then
  358. local mp = task:get_parts() or {}
  359. for _, mime_part in ipairs(mp) do
  360. local how = opts.output
  361. local part
  362. if mime_part:is_text() then
  363. part = mime_part:get_text()
  364. end
  365. if part and opts.text and not part:is_html() then
  366. maybe_print_text_part_info(part, out_elts[fname])
  367. maybe_print_mime_part_info(mime_part, out_elts[fname])
  368. if not opts.json and not opts.ucl then
  369. table.insert(out_elts[fname], '\n')
  370. end
  371. if opts.words then
  372. local how_words = opts['words_format'] or 'stem'
  373. table.insert(out_elts[fname], print_words(part:get_words(how_words),
  374. how_words == 'full'))
  375. else
  376. table.insert(out_elts[fname], tostring(part:get_content(how)))
  377. end
  378. elseif part and opts.html and part:is_html() then
  379. maybe_print_text_part_info(part, out_elts[fname])
  380. maybe_print_mime_part_info(mime_part, out_elts[fname])
  381. if not opts.json and not opts.ucl then
  382. table.insert(out_elts[fname], '\n')
  383. end
  384. if opts.words then
  385. local how_words = opts['words_format'] or 'stem'
  386. table.insert(out_elts[fname], print_words(part:get_words(how_words),
  387. how_words == 'full'))
  388. else
  389. if opts.structure then
  390. local hc = part:get_html()
  391. local res = {}
  392. process_func = function(elt)
  393. local fun = require "fun"
  394. if type(elt) == 'table' then
  395. return table.concat(fun.totable(
  396. fun.map(
  397. function(t)
  398. return rspamd_logger.slog("%s", t)
  399. end,
  400. elt)), '\n')
  401. else
  402. return rspamd_logger.slog("%s", elt)
  403. end
  404. end
  405. hc:foreach_tag('any', function(tag)
  406. local elt = {}
  407. local ex = tag:get_extra()
  408. elt.tag = tag:get_type()
  409. if ex then
  410. elt.extra = ex
  411. end
  412. local content = tag:get_content()
  413. if content then
  414. elt.content = tostring(content)
  415. end
  416. local style = tag:get_style()
  417. if style then
  418. elt.style = style
  419. end
  420. table.insert(res, elt)
  421. end)
  422. table.insert(out_elts[fname], res)
  423. else
  424. -- opts.structure
  425. table.insert(out_elts[fname], tostring(part:get_content(how)))
  426. end
  427. if opts.invisible then
  428. local hc = part:get_html()
  429. table.insert(out_elts[fname], string.format('invisible content: %s',
  430. tostring(hc:get_invisible())))
  431. end
  432. end
  433. end
  434. if not part then
  435. maybe_print_mime_part_info(mime_part, out_elts[fname])
  436. end
  437. end
  438. end
  439. table.insert(out_elts[fname], "")
  440. table.insert(tasks, task)
  441. end
  442. print_elts(out_elts, opts, process_func)
  443. -- To avoid use after free we postpone tasks destruction
  444. for _, task in ipairs(tasks) do
  445. task:destroy()
  446. end
  447. end
  448. local function stat_handler(opts)
  449. local fun = require "fun"
  450. local out_elts = {}
  451. load_config(opts)
  452. rspamd_url.init(rspamd_config:get_tld_path())
  453. rspamd_config:init_subsystem('langdet,stat') -- Needed to gen stat tokens
  454. local process_func
  455. for _, fname in ipairs(opts.file) do
  456. local task = load_task(opts, fname)
  457. out_elts[fname] = {}
  458. if opts.meta then
  459. local mt = lua_meta.gen_metatokens_table(task)
  460. out_elts[fname] = mt
  461. process_func = function(k, v)
  462. return string.format("%s = %s", k, v)
  463. end
  464. elseif opts.bayes then
  465. local bt = task:get_stat_tokens()
  466. out_elts[fname] = bt
  467. process_func = function(e)
  468. return string.format('%s (%d): "%s"+"%s", [%s]', e.data, e.win, e.t1 or "",
  469. e.t2 or "", table.concat(fun.totable(
  470. fun.map(function(k)
  471. return k
  472. end, e.flags)), ","))
  473. end
  474. elseif opts.fuzzy then
  475. local parts = task:get_parts() or {}
  476. out_elts[fname] = {}
  477. process_func = function(e)
  478. local ret = string.format('part: %s(%s): %s', e.type, e.file or "", e.digest)
  479. if opts.shingles and e.shingles then
  480. local sgl = {}
  481. for _, s in ipairs(e.shingles) do
  482. table.insert(sgl, string.format('%s: %s+%s+%s', s[1], s[2], s[3], s[4]))
  483. end
  484. ret = ret .. '\n' .. table.concat(sgl, '\n')
  485. end
  486. return ret
  487. end
  488. for _, part in ipairs(parts) do
  489. if not part:is_multipart() then
  490. local text = part:get_text()
  491. if text then
  492. local digest, shingles = text:get_fuzzy_hashes(task:get_mempool())
  493. table.insert(out_elts[fname], {
  494. digest = digest,
  495. shingles = shingles,
  496. type = string.format('%s/%s',
  497. ({ part:get_type() })[1],
  498. ({ part:get_type() })[2])
  499. })
  500. else
  501. table.insert(out_elts[fname], {
  502. digest = part:get_digest(),
  503. file = part:get_filename(),
  504. type = string.format('%s/%s',
  505. ({ part:get_type() })[1],
  506. ({ part:get_type() })[2])
  507. })
  508. end
  509. end
  510. end
  511. end
  512. task:destroy() -- No automatic dtor
  513. end
  514. print_elts(out_elts, opts, process_func)
  515. end
  516. local function urls_handler(opts)
  517. load_config(opts)
  518. rspamd_url.init(rspamd_config:get_tld_path())
  519. local out_elts = {}
  520. if opts.json then
  521. rspamd_logger.messagex('[')
  522. end
  523. for _, fname in ipairs(opts.file) do
  524. out_elts[fname] = {}
  525. local task = load_task(opts, fname)
  526. local elts = {}
  527. local function process_url(u)
  528. local s
  529. if opts.tld then
  530. s = u:get_tld()
  531. elseif opts.host then
  532. s = u:get_host()
  533. elseif opts.full then
  534. s = rspamd_logger.slog('%s: %s', u:get_text(), u:to_table())
  535. else
  536. s = u:get_text()
  537. end
  538. if opts.unique then
  539. if elts[s] then
  540. elts[s].count = elts[s].count + 1
  541. else
  542. elts[s] = {
  543. count = 1,
  544. url = u:to_table()
  545. }
  546. end
  547. else
  548. if opts.json then
  549. table.insert(elts, u)
  550. else
  551. table.insert(elts, s)
  552. end
  553. end
  554. end
  555. for _, u in ipairs(task:get_urls(true)) do
  556. process_url(u)
  557. end
  558. local json_elts = {}
  559. local function process_elt(s, u)
  560. if opts.unique then
  561. -- s is string, u is {url = url, count = count }
  562. if not opts.json then
  563. if opts.count then
  564. table.insert(json_elts, string.format('%s : %s', s, u.count))
  565. else
  566. table.insert(json_elts, s)
  567. end
  568. else
  569. local tb = u.url
  570. tb.count = u.count
  571. table.insert(json_elts, tb)
  572. end
  573. else
  574. -- s is index, u is url or string
  575. if opts.json then
  576. table.insert(json_elts, u)
  577. else
  578. table.insert(json_elts, u)
  579. end
  580. end
  581. end
  582. if opts.sort then
  583. local sfunc
  584. if opts.unique then
  585. sfunc = function(t, a, b)
  586. if t[a].count ~= t[b].count then
  587. if opts.reverse then
  588. return t[a].count > t[b].count
  589. else
  590. return t[a].count < t[b].count
  591. end
  592. else
  593. -- Sort lexicography
  594. if opts.reverse then
  595. return a > b
  596. else
  597. return a < b
  598. end
  599. end
  600. end
  601. else
  602. sfunc = function(t, a, b)
  603. local va, vb
  604. if opts.json then
  605. va = t[a]:get_text()
  606. vb = t[b]:get_text()
  607. else
  608. va = t[a]
  609. vb = t[b]
  610. end
  611. if opts.reverse then
  612. return va > vb
  613. else
  614. return va < vb
  615. end
  616. end
  617. end
  618. for s, u in lua_util.spairs(elts, sfunc) do
  619. process_elt(s, u)
  620. end
  621. else
  622. for s, u in pairs(elts) do
  623. process_elt(s, u)
  624. end
  625. end
  626. out_elts[fname] = json_elts
  627. task:destroy() -- No automatic dtor
  628. end
  629. print_elts(out_elts, opts)
  630. end
  631. local function newline(task)
  632. local t = task:get_newlines_type()
  633. if t == 'cr' then
  634. return '\r'
  635. elseif t == 'lf' then
  636. return '\n'
  637. end
  638. return '\r\n'
  639. end
  640. local function modify_handler(opts)
  641. load_config(opts)
  642. rspamd_url.init(rspamd_config:get_tld_path())
  643. local function read_file(file)
  644. local f = assert(io.open(file, "rb"))
  645. local content = f:read("*all")
  646. f:close()
  647. return content
  648. end
  649. local text_footer, html_footer
  650. if opts['text_footer'] then
  651. text_footer = read_file(opts['text_footer'])
  652. end
  653. if opts['html_footer'] then
  654. html_footer = read_file(opts['html_footer'])
  655. end
  656. for _, fname in ipairs(opts.file) do
  657. local task = load_task(opts, fname)
  658. local newline_s = newline(task)
  659. local seen_cte
  660. local rewrite = lua_mime.add_text_footer(task, html_footer, text_footer) or {}
  661. local out = {} -- Start with headers
  662. local function process_headers_cb(name, hdr)
  663. for _, h in ipairs(opts['remove_header']) do
  664. if name:match(h) then
  665. return
  666. end
  667. end
  668. for _, h in ipairs(opts['rewrite_header']) do
  669. local hname, hpattern = h:match('^([^=]+)=(.+)$')
  670. if hname == name then
  671. local new_value = string.format(hpattern, hdr.decoded)
  672. new_value = string.format('%s:%s%s',
  673. name, hdr.separator,
  674. rspamd_util.fold_header(name,
  675. rspamd_util.mime_header_encode(new_value),
  676. task:get_newlines_type()))
  677. out[#out + 1] = new_value
  678. return
  679. end
  680. end
  681. if rewrite.need_rewrite_ct then
  682. if name:lower() == 'content-type' then
  683. local nct = string.format('%s: %s/%s; charset=utf-8',
  684. 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
  685. out[#out + 1] = nct
  686. return
  687. elseif name:lower() == 'content-transfer-encoding' then
  688. out[#out + 1] = string.format('%s: %s',
  689. 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
  690. seen_cte = true
  691. return
  692. end
  693. end
  694. out[#out + 1] = hdr.raw:gsub('\r?\n?$', '')
  695. end
  696. task:headers_foreach(process_headers_cb, { full = true })
  697. for _, h in ipairs(opts['add_header']) do
  698. local hname, hvalue = h:match('^([^=]+)=(.+)$')
  699. if hname and hvalue then
  700. out[#out + 1] = string.format('%s: %s', hname,
  701. rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
  702. end
  703. end
  704. if not seen_cte and rewrite.need_rewrite_ct then
  705. out[#out + 1] = string.format('%s: %s',
  706. 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
  707. end
  708. -- End of headers
  709. out[#out + 1] = ''
  710. if rewrite.out then
  711. for _, o in ipairs(rewrite.out) do
  712. out[#out + 1] = o
  713. end
  714. else
  715. out[#out + 1] = { task:get_rawbody(), false }
  716. end
  717. for _, o in ipairs(out) do
  718. if type(o) == 'string' then
  719. io.write(o)
  720. io.write(newline_s)
  721. elseif type(o) == 'table' then
  722. io.flush()
  723. if type(o[1]) == 'string' then
  724. io.write(o[1])
  725. else
  726. o[1]:save_in_file(1)
  727. end
  728. if o[2] then
  729. io.write(newline_s)
  730. end
  731. else
  732. o:save_in_file(1)
  733. io.write(newline_s)
  734. end
  735. end
  736. task:destroy() -- No automatic dtor
  737. end
  738. end
  739. local function sign_handler(opts)
  740. load_config(opts)
  741. rspamd_url.init(rspamd_config:get_tld_path())
  742. local lua_dkim = require("lua_ffi").dkim
  743. if not lua_dkim then
  744. io.stderr:write('FFI support is required: please use LuaJIT or install lua-ffi')
  745. os.exit(1)
  746. end
  747. local sign_key
  748. if rspamd_util.file_exists(opts.key) then
  749. sign_key = lua_dkim.load_sign_key(opts.key, 'file')
  750. else
  751. sign_key = lua_dkim.load_sign_key(opts.key, 'base64')
  752. end
  753. if not sign_key then
  754. io.stderr:write('Cannot load key: ' .. opts.key .. '\n')
  755. os.exit(1)
  756. end
  757. for _, fname in ipairs(opts.file) do
  758. local task = load_task(opts, fname)
  759. local ctx = lua_dkim.create_sign_context(task, sign_key, nil, opts.algorithm)
  760. if not ctx then
  761. io.stderr:write('Cannot init signing\n')
  762. os.exit(1)
  763. end
  764. local sig = lua_dkim.do_sign(task, ctx, opts.selector, opts.domain)
  765. if not sig then
  766. io.stderr:write('Cannot create signature\n')
  767. os.exit(1)
  768. end
  769. if opts.output == 'signature' then
  770. io.write(sig)
  771. io.write('\n')
  772. io.flush()
  773. else
  774. local dkim_hdr = string.format('%s: %s%s',
  775. 'DKIM-Signature',
  776. rspamd_util.fold_header('DKIM-Signature',
  777. rspamd_util.mime_header_encode(sig),
  778. task:get_newlines_type()),
  779. newline(task))
  780. io.write(dkim_hdr)
  781. io.flush()
  782. task:get_content():save_in_file(1)
  783. end
  784. task:destroy() -- No automatic dtor
  785. end
  786. end
  787. -- Strips directories and .extensions (if present) from a filepath
  788. local function filename_only(filepath)
  789. local filename = filepath:match(".*%/([^%.]+)")
  790. if not filename then
  791. filename = filepath:match("([^%.]+)")
  792. end
  793. return filename
  794. end
  795. assert(filename_only("very_simple") == "very_simple")
  796. assert(filename_only("/home/very_simple.eml") == "very_simple")
  797. assert(filename_only("very_simple.eml") == "very_simple")
  798. assert(filename_only("very_simple.example.eml") == "very_simple")
  799. assert(filename_only("/home/very_simple") == "very_simple")
  800. assert(filename_only("home/very_simple") == "very_simple")
  801. assert(filename_only("./home/very_simple") == "very_simple")
  802. assert(filename_only("../home/very_simple.eml") == "very_simple")
  803. assert(filename_only("/home/dir.with.dots/very_simple.eml") == "very_simple")
  804. --Write the dump content to file or standard out
  805. local function write_dump_content(dump_content, fname, extension, outdir)
  806. if type(dump_content) == "string" then
  807. dump_content = rspamd_text.fromstring(dump_content)
  808. end
  809. local wrote_filepath = nil
  810. if outdir then
  811. if outdir:sub(-1) ~= "/" then
  812. outdir = outdir .. "/"
  813. end
  814. local outpath = string.format("%s%s.%s", outdir, filename_only(fname), extension)
  815. if rspamd_util.file_exists(outpath) then
  816. os.remove(outpath)
  817. end
  818. if dump_content:save_in_file(outpath) then
  819. wrote_filepath = outpath
  820. io.write(wrote_filepath .. "\n")
  821. else
  822. io.stderr:write(string.format("Unable to save dump content to file: %s\n", outpath))
  823. end
  824. else
  825. dump_content:save_in_file(1)
  826. end
  827. return wrote_filepath
  828. end
  829. -- Get the formatted ucl (split or unsplit) or the raw task content
  830. local function get_dump_content(task, opts, fname)
  831. if opts.ucl or opts.json or opts.messagepack then
  832. local ucl_object = lua_mime.message_to_ucl(task)
  833. -- Split out the content field into separate raws and update the ucl
  834. if opts.split then
  835. for i, part in ipairs(ucl_object.parts) do
  836. if part.content then
  837. local part_filename = string.format("%s-part%d", filename_only(fname), i)
  838. local part_path = write_dump_content(part.content, part_filename, "raw", opts.outdir)
  839. if part_path then
  840. part.content = ucl.null
  841. part.content_path = part_path
  842. end
  843. end
  844. end
  845. end
  846. local extension = output_fmt(opts)
  847. return ucl.to_format(ucl_object, extension), extension
  848. end
  849. return task:get_content(), "mime"
  850. end
  851. local function dump_handler(opts)
  852. load_config(opts)
  853. rspamd_url.init(rspamd_config:get_tld_path())
  854. for _, fname in ipairs(opts.file) do
  855. local task = load_task(opts, fname)
  856. local data, extension = get_dump_content(task, opts, fname)
  857. write_dump_content(data, fname, extension, opts.outdir)
  858. task:destroy() -- No automatic dtor
  859. end
  860. end
  861. local function handler(args)
  862. local opts = parser:parse(args)
  863. local command = opts.command
  864. if type(opts.file) == 'string' then
  865. opts.file = { opts.file }
  866. elseif type(opts.file) == 'none' then
  867. opts.file = {}
  868. end
  869. if command == 'extract' then
  870. extract_handler(opts)
  871. elseif command == 'stat' then
  872. stat_handler(opts)
  873. elseif command == 'urls' then
  874. urls_handler(opts)
  875. elseif command == 'modify' then
  876. modify_handler(opts)
  877. elseif command == 'sign' then
  878. sign_handler(opts)
  879. elseif command == 'dump' then
  880. dump_handler(opts)
  881. else
  882. parser:error('command %s is not implemented', command)
  883. end
  884. end
  885. return {
  886. name = 'mime',
  887. aliases = { 'mime_tool' },
  888. handler = handler,
  889. description = parser._description
  890. }