You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_selectors.lua 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This module contains 'selectors' implementation: code to extract data
  14. -- from Rspamd tasks and compose those together
  15. --[[[
  16. -- @module lua_selectors
  17. -- This module contains 'selectors' implementation: code to extract data
  18. -- from Rspamd tasks and compose those together.
  19. -- Typical selector looks like this: header(User).lower.substring(1, 2):ip
  20. --]]
  21. local exports = {}
  22. local logger = require 'rspamd_logger'
  23. local fun = require 'fun'
  24. local lua_util = require "lua_util"
  25. local M = "lua_selectors"
  26. local E = {}
  27. local extractors = {
  28. -- Get source IP address
  29. ['ip'] = {
  30. ['type'] = 'ip',
  31. ['get_value'] = function(task)
  32. local ip = task:get_ip()
  33. if ip and ip:is_valid() then return tostring(ip) end
  34. return nil
  35. end,
  36. ['description'] = 'Get source IP address',
  37. },
  38. -- Get SMTP from
  39. ['smtp_from'] = {
  40. ['type'] = 'email',
  41. ['get_value'] = function(task)
  42. local from = task:get_from(0)
  43. if ((from or E)[1] or E).addr then
  44. return from[1]
  45. end
  46. return nil
  47. end,
  48. ['description'] = 'Get SMTP from',
  49. },
  50. -- Get MIME from
  51. ['mime_from'] = {
  52. ['type'] = 'email',
  53. ['get_value'] = function(task)
  54. local from = task:get_from(0)
  55. if ((from or E)[1] or E).addr then
  56. return from[1]
  57. end
  58. return nil
  59. end,
  60. ['description'] = 'Get MIME from',
  61. },
  62. -- Get country (ASN module must be executed first)
  63. ['country'] = {
  64. ['type'] = 'string',
  65. ['get_value'] = function(task)
  66. local asn = task:get_mempool():get_variable('asn')
  67. if not asn then
  68. return nil
  69. else
  70. return asn
  71. end
  72. end,
  73. ['description'] = 'Get country (ASN module must be executed first)',
  74. },
  75. -- Get ASN number
  76. ['asn'] = {
  77. ['type'] = 'string',
  78. ['get_value'] = function(task)
  79. local asn = task:get_mempool():get_variable('asn')
  80. if not asn then
  81. return nil
  82. else
  83. return asn
  84. end
  85. end,
  86. ['description'] = 'Get ASN number',
  87. },
  88. -- Get authenticated username
  89. ['user'] = {
  90. ['type'] = 'string',
  91. ['get_value'] = function(task)
  92. local auser = task:get_user()
  93. if not auser then
  94. return nil
  95. else
  96. return auser
  97. end
  98. end,
  99. ['description'] = 'Get authenticated username',
  100. },
  101. -- Get principal recipient
  102. ['to'] = {
  103. ['type'] = 'email',
  104. ['get_value'] = function(task)
  105. return task:get_principal_recipient()
  106. end,
  107. ['description'] = 'Get principal recipient',
  108. },
  109. -- Get content digest
  110. ['digest'] = {
  111. ['type'] = 'string',
  112. ['get_value'] = function(task)
  113. return task:get_digest()
  114. end,
  115. ['description'] = 'Get content digest',
  116. },
  117. -- Get list of all attachments digests
  118. ['attachments'] = {
  119. ['type'] = 'string_list',
  120. ['get_value'] = function(task)
  121. local parts = task:get_parts() or E
  122. local digests = {}
  123. for _,p in ipairs(parts) do
  124. if p:get_filename() then
  125. table.insert(digests, p:get_digest())
  126. end
  127. end
  128. if #digests > 0 then
  129. return digests
  130. end
  131. return nil
  132. end,
  133. ['description'] = 'Get list of all attachments digests',
  134. },
  135. -- Get all attachments files
  136. ['files'] = {
  137. ['type'] = 'string_list',
  138. ['get_value'] = function(task)
  139. local parts = task:get_parts() or E
  140. local files = {}
  141. for _,p in ipairs(parts) do
  142. local fname = p:get_filename()
  143. if fname then
  144. table.insert(files, fname)
  145. end
  146. end
  147. if #files > 0 then
  148. return files
  149. end
  150. return nil
  151. end,
  152. ['description'] = 'Get all attachments files',
  153. },
  154. -- Get helo value
  155. ['helo'] = {
  156. ['type'] = 'string',
  157. ['get_value'] = function(task)
  158. return task:get_helo()
  159. end,
  160. ['description'] = 'Get helo value',
  161. },
  162. -- Get header with the name that is expected as an argument. Returns list of
  163. -- headers with this name
  164. ['header'] = {
  165. ['type'] = 'kv_list',
  166. ['get_value'] = function(task, args)
  167. return task:get_header_full(args[1])
  168. end,
  169. ['description'] = 'Get header with the name that is expected as an argument. Returns list of headers with this name',
  170. },
  171. -- Get list of received headers (returns list of tables)
  172. ['received'] = {
  173. ['type'] = 'kv_list',
  174. ['get_value'] = function(task)
  175. return task:get_received_headers()
  176. end,
  177. ['description'] = 'Get list of received headers (returns list of tables)',
  178. },
  179. -- Get all urls
  180. ['urls'] = {
  181. ['type'] = 'url_list',
  182. ['get_value'] = function(task)
  183. return task:get_urls()
  184. end,
  185. ['description'] = 'Get all urls',
  186. },
  187. -- Get all emails
  188. ['emails'] = {
  189. ['type'] = 'url_list',
  190. ['get_value'] = function(task)
  191. return task:get_emails()
  192. end,
  193. ['description'] = 'Get all emails',
  194. },
  195. -- Get specific pool var. The first argument must be variable name,
  196. -- the second argument is optional and defines the type (string by default)
  197. ['pool_var'] = {
  198. ['type'] = 'string',
  199. ['get_value'] = function(task, args)
  200. return task:get_mempool():get_variable(args[1], args[2])
  201. end,
  202. ['description'] = [[Get specific pool var. The first argument must be variable name,
  203. the second argument is optional and defines the type (string by default)]],
  204. },
  205. -- Get specific HTTP request header. The first argument must be header name.
  206. ['request_header'] = {
  207. ['type'] = 'string',
  208. ['get_value'] = function(task, args)
  209. local hdr = task:get_request_header(args[1])
  210. if hdr then
  211. return tostring(hdr)
  212. end
  213. return nil
  214. end,
  215. ['description'] = 'Get specific HTTP request header. The first argument must be header name.',
  216. },
  217. -- Get task date, optionally formatted
  218. ['time'] = {
  219. ['type'] = 'string',
  220. ['get_value'] = function(task, args)
  221. local what = args[1] or 'message'
  222. local dt = task:get_date{format = what, gmt = true}
  223. if dt then
  224. if args[2] then
  225. -- Should be in format !xxx, as dt is in GMT
  226. return os.date(args[2], dt)
  227. end
  228. return tostring(dt)
  229. end
  230. return nil
  231. end,
  232. ['description'] = 'Get task date, optionally formatted (see os.date)',
  233. }
  234. }
  235. local function pure_type(ltype)
  236. return ltype:match('^(.*)_list$')
  237. end
  238. local transform_function = {
  239. -- Get hostname from url or a list of urls
  240. ['get_host'] = {
  241. ['types'] = {
  242. ['url'] = true
  243. },
  244. ['map_type'] = 'string',
  245. ['process'] = function(inp, t)
  246. return inp:get_host(),'string'
  247. end,
  248. ['description'] = 'Get hostname from url or a list of urls',
  249. },
  250. -- Get tld from url or a list of urls
  251. ['get_tld'] = {
  252. ['types'] = {
  253. ['url'] = true
  254. },
  255. ['map_type'] = 'string',
  256. ['process'] = function(inp, t)
  257. return inp:get_tld()
  258. end,
  259. ['description'] = 'Get tld from url or a list of urls',
  260. },
  261. -- Get address
  262. ['get_addr'] = {
  263. ['types'] = {
  264. ['email'] = true
  265. },
  266. ['map_type'] = 'string',
  267. ['process'] = function(inp, _)
  268. return inp:get_addr()
  269. end,
  270. ['description'] = 'Get email address as a string',
  271. },
  272. -- Returns the lowercased string
  273. ['lower'] = {
  274. ['types'] = {
  275. ['string'] = true,
  276. },
  277. ['map_type'] = 'string',
  278. ['process'] = function(inp, _)
  279. return inp:lower(),'string'
  280. end,
  281. ['description'] = 'Returns the lowercased string',
  282. },
  283. -- Returns the first element
  284. ['first'] = {
  285. ['types'] = {
  286. ['url_list'] = true,
  287. ['kv_list'] = true,
  288. ['received_list'] = true,
  289. ['string_list'] = true
  290. },
  291. ['process'] = function(inp, t)
  292. return inp[1],pure_type(t)
  293. end,
  294. ['description'] = 'Returns the first element',
  295. },
  296. -- Returns the last element
  297. ['last'] = {
  298. ['types'] = {
  299. ['url_list'] = true,
  300. ['kv_list'] = true,
  301. ['string_list'] = true
  302. },
  303. ['process'] = function(inp, t)
  304. return inp[#inp],pure_type(t)
  305. end,
  306. ['description'] = 'Returns the last element',
  307. },
  308. -- Returns the nth element
  309. ['nth'] = {
  310. ['types'] = {
  311. ['url_list'] = true,
  312. ['kv_list'] = true,
  313. ['string_list'] = true
  314. },
  315. ['process'] = function(inp, t, args)
  316. return inp[tonumber(args[1] or 1)],pure_type(t)
  317. end,
  318. ['description'] = 'Returns the nth element',
  319. },
  320. -- Joins strings into a single string using separator in the argument
  321. ['join'] = {
  322. ['types'] = {
  323. ['string_list'] = true
  324. },
  325. ['process'] = function(inp, _, args)
  326. return table.concat(inp, args[1] or ''), 'string'
  327. end,
  328. ['description'] = 'Joins strings into a single string using separator in the argument',
  329. },
  330. -- Create a digest from string or a list of strings
  331. ['digest'] = {
  332. ['types'] = {
  333. ['string'] = true
  334. },
  335. ['map_type'] = 'hash',
  336. ['process'] = function(inp, _, args)
  337. local hash = require 'rspamd_cryptobox_hash'
  338. local ht = args[1] or 'blake2'
  339. return hash:create_specific(ht):update(inp), 'hash'
  340. end,
  341. ['description'] = 'Create a digest from string or a list of strings',
  342. },
  343. -- Encode hash to string (using hex encoding by default)
  344. ['encode'] = {
  345. ['types'] = {
  346. ['hash'] = true
  347. },
  348. ['map_type'] = 'string',
  349. ['process'] = function(inp, _, args)
  350. local how = args[1] or 'hex'
  351. if how == 'hex' then
  352. return inp:hex()
  353. elseif how == 'base32' then
  354. return inp:base32()
  355. elseif how == 'base64' then
  356. return inp:base64()
  357. end
  358. end,
  359. ['description'] = 'Encode hash to string (using hex encoding by default)',
  360. },
  361. -- Extracts substring
  362. ['substring'] = {
  363. ['types'] = {
  364. ['string'] = true
  365. },
  366. ['map_type'] = 'string',
  367. ['process'] = function(inp, _, args)
  368. local start_pos = args[1] or 1
  369. local end_pos = args[2] or -1
  370. return inp:sub(start_pos, end_pos), 'string'
  371. end,
  372. ['description'] = 'Extracts substring',
  373. },
  374. -- Drops input value and return values from function's arguments or an empty string
  375. ['id'] = {
  376. ['types'] = {
  377. ['string'] = true
  378. },
  379. ['map_type'] = 'string',
  380. ['process'] = function(_, _, args)
  381. if args[1] and args[2] then
  382. return fun.map(tostring, args)
  383. elseif args[1] then
  384. return args[1]
  385. end
  386. return ''
  387. end,
  388. ['description'] = 'Drops input value and return values from function\'s arguments or an empty string',
  389. },
  390. -- Extracts table value from key-value list
  391. ['elt'] = {
  392. ['types'] = {
  393. ['kv'] = true,
  394. },
  395. ['map_type'] = 'string',
  396. ['process'] = function(inp, t, args)
  397. return inp[args[1]],'string'
  398. end,
  399. ['description'] = 'Extracts table value from key-value list',
  400. },
  401. -- Call specific userdata method
  402. ['method'] = {
  403. ['types'] = {
  404. ['email'] = true,
  405. ['url'] = true,
  406. ['ip'] = true,
  407. },
  408. ['map_type'] = 'string',
  409. ['process'] = function(inp, _, args)
  410. return inp[args[1]](inp)
  411. end,
  412. ['description'] = 'Call specific userdata method',
  413. },
  414. -- Boolean function in, returns either nil or its input if input is in args list
  415. ['in'] = {
  416. ['types'] = {
  417. ['string'] = true,
  418. },
  419. ['map_type'] = 'string',
  420. ['process'] = function(inp, t, args)
  421. for _,a in ipairs(args) do if a == inp then return inp,t end end
  422. return nil
  423. end,
  424. ['description'] = 'Boolean function in, returns either nil or its input if input is in args list',
  425. },
  426. ['not_in'] = {
  427. ['types'] = {
  428. ['string'] = true,
  429. },
  430. ['map_type'] = 'string',
  431. ['process'] = function(inp, t, args)
  432. for _,a in ipairs(args) do if a == inp then return nil end end
  433. return inp,t
  434. end,
  435. ['description'] = 'Boolean function in, returns either nil or its input if input is not in args list',
  436. },
  437. }
  438. local function process_selector(task, sel)
  439. local input = sel.selector.get_value(task, sel.selector.args)
  440. if not input then return nil end
  441. -- Now we fold elements using left fold
  442. local function fold_function(acc, x)
  443. if acc == nil or acc[1] == nil then return nil end
  444. local value = acc[1]
  445. local t = acc[2]
  446. if not x.types[t] then
  447. -- Additional case for map
  448. local pt = pure_type(t, '^(.*)_list$')
  449. if pt and x.map_type and x.types[pt] then
  450. return {fun.map(function(list_elt)
  451. local ret, _ = x.process(list_elt, pt, x.args)
  452. return ret
  453. end, value), x.map_type}
  454. end
  455. logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
  456. return nil
  457. end
  458. return {x.process(value, t, x.args)}
  459. end
  460. local res = fun.foldl(fold_function,
  461. {input, sel.selector.type},
  462. sel.processor_pipe)
  463. if not res or not res[1] then return nil end -- Pipeline failed
  464. if not (res[2] == 'string' or res[2] == 'string_list') then
  465. logger.errx(task, 'transform pipeline has returned bad type: %s, string expected: res = %s, sel: %s',
  466. res[2], res, sel)
  467. return nil
  468. end
  469. if res[2] == 'string_list' then
  470. -- Convert to table as it might have a functional form
  471. return fun.totable(res[1])
  472. end
  473. return res[1]
  474. end
  475. local function make_grammar()
  476. local l = require "lpeg"
  477. local spc = l.S(" \t\n")^0
  478. local atom = l.C((l.R("az") + l.R("AZ") + l.R("09") + l.S("_-"))^1)
  479. local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P'\\' * 1))^0) * "'"
  480. local doublequoted_string = l.P '"' * l.C(((1 - l.S'"\r\n\f\\') + (l.P'\\' * 1))^0) * '"'
  481. local argument = atom + singlequoted_string + doublequoted_string
  482. local dot = l.P(".")
  483. local obrace = "(" * spc
  484. local ebrace = spc * ")"
  485. local comma = spc * "," * spc
  486. local sel_separator = l.S":;"
  487. return l.P{
  488. "LIST";
  489. LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR")))^0,
  490. EXPR = l.V("FUNCTION") * (dot * l.V("PROCESSOR"))^0,
  491. PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  492. FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  493. ARG_LIST = l.Ct((argument * comma^0)^0)
  494. }
  495. end
  496. local parser = make_grammar()
  497. --[[[
  498. -- @function lua_selectors.parse_selectors(cfg, str)
  499. --]]
  500. exports.parse_selector = function(cfg, str)
  501. local parsed = {parser:match(str)}
  502. local output = {}
  503. if not parsed then return nil end
  504. -- Output AST format is the following:
  505. -- table of individual selectors
  506. -- each selector: list of functions
  507. -- each function: function name + optional list of arguments
  508. for _,sel in ipairs(parsed) do
  509. local res = {
  510. selector = {},
  511. processor_pipe = {},
  512. }
  513. local selector_tbl = sel[1]
  514. if not selector_tbl then
  515. logger.errx(cfg, 'no selector represented')
  516. return nil
  517. end
  518. if not extractors[selector_tbl[1]] then
  519. logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
  520. return nil
  521. end
  522. res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
  523. res.selector.name = selector_tbl[1]
  524. res.selector.args = selector_tbl[2] or {}
  525. lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
  526. res.selector.name, res.selector.arg)
  527. -- Now process processors pipe
  528. fun.each(function(proc_tbl)
  529. local proc_name = proc_tbl[1]
  530. if not transform_function[proc_name] then
  531. logger.errx(cfg, 'processor %s is unknown', proc_name)
  532. return nil
  533. end
  534. local processor = lua_util.shallowcopy(transform_function[proc_name])
  535. processor.name = proc_name
  536. processor.args = proc_tbl[2]
  537. lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
  538. proc_name, res.selector.name, processor.args)
  539. table.insert(res.processor_pipe, processor)
  540. end, fun.tail(sel))
  541. table.insert(output, res)
  542. end
  543. return output
  544. end
  545. --[[[
  546. -- @function lua_selectors.register_selector(cfg, name, selector)
  547. --]]
  548. exports.register_selector = function(cfg, name, selector)
  549. if selector.get_value and selector.type then
  550. if extractors[name] then
  551. logger.warnx(cfg, 'redefining selector %s', name)
  552. end
  553. extractors[name] = selector
  554. return true
  555. end
  556. logger.errx(cfg, 'bad selector %s', name)
  557. return false
  558. end
  559. --[[[
  560. -- @function lua_selectors.register_transform(cfg, name, transform)
  561. --]]
  562. exports.register_transform = function(cfg, name, transform)
  563. if transform.process and transform.types then
  564. if transform_function[name] then
  565. logger.warnx(cfg, 'redefining transform function %s', name)
  566. end
  567. transform_function[name] = transform
  568. return true
  569. end
  570. logger.errx(cfg, 'bad transform function %s', name)
  571. return false
  572. end
  573. --[[[
  574. -- @function lua_selectors.process_selectors(task, selectors_pipe)
  575. --]]
  576. exports.process_selectors = function(task, selectors_pipe)
  577. local ret = {}
  578. for _,sel in ipairs(selectors_pipe) do
  579. local r = process_selector(task, sel)
  580. -- If any element is nil, then the whole selector is nil
  581. if not r then return nil end
  582. table.insert(ret, r)
  583. end
  584. return ret
  585. end
  586. --[[[
  587. -- @function lua_selectors.combine_selectors(task, selectors, delimiter)
  588. --]]
  589. exports.combine_selectors = function(_, selectors, delimiter)
  590. if not delimiter then delimiter = '' end
  591. if not selectors then return nil end
  592. local all_strings = fun.all(function(s) return type(s) == 'string' end, selectors)
  593. if all_strings then
  594. return table.concat(selectors, delimiter)
  595. else
  596. -- We need to do a spill on each table selector
  597. -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
  598. local prefix = {}
  599. local tbl = {}
  600. local suffix = {}
  601. local res = {}
  602. local in_prefix = true
  603. for _,s in ipairs(selectors) do
  604. if in_prefix then
  605. if type(s) == 'string' then
  606. table.insert(prefix, s)
  607. else
  608. in_prefix = false
  609. table.insert(tbl, s)
  610. end
  611. else
  612. if type(s) == 'string' then
  613. table.insert(suffix, s)
  614. else
  615. table.insert(tbl, s)
  616. end
  617. end
  618. end
  619. prefix = table.concat(prefix, delimiter)
  620. suffix = table.concat(suffix, delimiter)
  621. for _,t in ipairs(tbl) do
  622. fun.each(function(...)
  623. table.insert(res, table.concat({...}, delimiter))
  624. end, fun.zip(fun.duplicate(prefix), t, fun.duplicate(suffix)))
  625. end
  626. return res
  627. end
  628. end
  629. local function display_selectors(tbl)
  630. return fun.tomap(fun.map(function(k,v)
  631. return k, fun.tomap(fun.filter(function(kk, vv)
  632. return type(vv) ~= 'function'
  633. end, v))
  634. end, tbl))
  635. end
  636. exports.list_extractors = function()
  637. return display_selectors(extractors)
  638. end
  639. exports.list_transforms = function()
  640. return display_selectors(transform_function)
  641. end
  642. return exports