You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_selectors.lua 30KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This module contains 'selectors' implementation: code to extract data
  14. -- from Rspamd tasks and compose those together
  15. --
  16. -- Read more at https://rspamd.com/doc/configuration/selectors.html
  17. --[[[
  18. -- @module lua_selectors
  19. -- This module contains 'selectors' implementation: code to extract data
  20. -- from Rspamd tasks and compose those together.
  21. -- Typical selector looks like this: header(User).lower.substring(1, 2):ip
  22. --]]
  23. local exports = {}
  24. local logger = require 'rspamd_logger'
  25. local fun = require 'fun'
  26. local lua_util = require "lua_util"
  27. local ts = require("tableshape").types
  28. local M = "selectors"
  29. local E = {}
  30. local extractors = {
  31. ['id'] = {
  32. ['get_value'] = function(_, args)
  33. if args[1] then
  34. return args[1], 'string'
  35. end
  36. return '','string'
  37. end,
  38. ['description'] = [[Return value from function's argument or an empty string,
  39. For example, `id('Something')` returns a string 'Something']],
  40. ['args_schema'] = {ts.string:is_optional()}
  41. },
  42. -- Get source IP address
  43. ['ip'] = {
  44. ['get_value'] = function(task)
  45. local ip = task:get_ip()
  46. if ip and ip:is_valid() then return ip,'userdata' end
  47. return nil
  48. end,
  49. ['description'] = [[Get source IP address]],
  50. },
  51. -- Get MIME from
  52. ['from'] = {
  53. ['get_value'] = function(task, args)
  54. local from = task:get_from(args[1] or 0)
  55. if ((from or E)[1] or E).addr then
  56. return from[1],'table'
  57. end
  58. return nil
  59. end,
  60. ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`,
  61. uses any type by default)]],
  62. },
  63. ['rcpts'] = {
  64. ['get_value'] = function(task, args)
  65. local rcpts = task:get_recipients(args[1] or 0)
  66. if ((rcpts or E)[1] or E).addr then
  67. return rcpts,'table_list'
  68. end
  69. return nil
  70. end,
  71. ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`,
  72. uses any type by default)]],
  73. },
  74. -- Get country (ASN module must be executed first)
  75. ['country'] = {
  76. ['get_value'] = function(task)
  77. local country = task:get_mempool():get_variable('country')
  78. if not country then
  79. return nil
  80. else
  81. return country,'string'
  82. end
  83. end,
  84. ['description'] = [[Get country (ASN module must be executed first)]],
  85. },
  86. -- Get ASN number
  87. ['asn'] = {
  88. ['type'] = 'string',
  89. ['get_value'] = function(task)
  90. local asn = task:get_mempool():get_variable('asn')
  91. if not asn then
  92. return nil
  93. else
  94. return asn,'string'
  95. end
  96. end,
  97. ['description'] = [[Get AS number (ASN module must be executed first)]],
  98. },
  99. -- Get authenticated username
  100. ['user'] = {
  101. ['get_value'] = function(task)
  102. local auser = task:get_user()
  103. if not auser then
  104. return nil
  105. else
  106. return auser,'string'
  107. end
  108. end,
  109. ['description'] = 'Get authenticated user name',
  110. },
  111. -- Get principal recipient
  112. ['to'] = {
  113. ['get_value'] = function(task)
  114. return task:get_principal_recipient(),'string'
  115. end,
  116. ['description'] = 'Get principal recipient',
  117. },
  118. -- Get content digest
  119. ['digest'] = {
  120. ['get_value'] = function(task)
  121. return task:get_digest(),'string'
  122. end,
  123. ['description'] = 'Get content digest',
  124. },
  125. -- Get list of all attachments digests
  126. ['attachments'] = {
  127. ['get_value'] = function(task, args)
  128. local s
  129. local parts = task:get_parts() or E
  130. local digests = {}
  131. if #args > 0 then
  132. local rspamd_cryptobox = require "rspamd_cryptobox_hash"
  133. local encoding = args[1] or 'hex'
  134. local ht = args[2] or 'blake2'
  135. for _,p in ipairs(parts) do
  136. if p:get_filename() then
  137. local h = rspamd_cryptobox.create_specific(ht, p:get_content('raw_parsed'))
  138. if encoding == 'hex' then
  139. s = h:hex()
  140. elseif encoding == 'base32' then
  141. s = h:base32()
  142. elseif encoding == 'base64' then
  143. s = h:base64()
  144. end
  145. table.insert(digests, s)
  146. end
  147. end
  148. else
  149. for _,p in ipairs(parts) do
  150. if p:get_filename() then
  151. table.insert(digests, p:get_digest())
  152. end
  153. end
  154. end
  155. if #digests > 0 then
  156. return digests,'string_list'
  157. end
  158. return nil
  159. end,
  160. ['description'] = [[Get list of all attachments digests.
  161. The first optional argument is encoding (`hex`, `base32`, `base64`),
  162. the second optional argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  163. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  164. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  165. },
  166. -- Get all attachments files
  167. ['files'] = {
  168. ['get_value'] = function(task)
  169. local parts = task:get_parts() or E
  170. local files = {}
  171. for _,p in ipairs(parts) do
  172. local fname = p:get_filename()
  173. if fname then
  174. table.insert(files, fname)
  175. end
  176. end
  177. if #files > 0 then
  178. return files,'string_list'
  179. end
  180. return nil
  181. end,
  182. ['description'] = 'Get all attachments files',
  183. },
  184. -- Get languages for text parts
  185. ['languages'] = {
  186. ['get_value'] = function(task)
  187. local text_parts = task:get_text_parts() or E
  188. local languages = {}
  189. for _,p in ipairs(text_parts) do
  190. local lang = p:get_language()
  191. if lang then
  192. table.insert(languages, lang)
  193. end
  194. end
  195. if #languages > 0 then
  196. return languages,'string_list'
  197. end
  198. return nil
  199. end,
  200. ['description'] = 'Get languages for text parts',
  201. },
  202. -- Get helo value
  203. ['helo'] = {
  204. ['get_value'] = function(task)
  205. return task:get_helo(),'string'
  206. end,
  207. ['description'] = 'Get helo value',
  208. },
  209. -- Get header with the name that is expected as an argument. Returns list of
  210. -- headers with this name
  211. ['header'] = {
  212. ['get_value'] = function(task, args)
  213. local strong = false
  214. if args[2] then
  215. if args[2]:match('strong') then
  216. strong = true
  217. end
  218. if args[2]:match('full') then
  219. return task:get_header_full(args[1], strong),'table_list'
  220. end
  221. return task:get_header(args[1], strong),'string'
  222. else
  223. return task:get_header(args[1]),'string'
  224. end
  225. end,
  226. ['description'] = [[Get header with the name that is expected as an argument.
  227. The optional second argument accepts list of flags:
  228. - `full`: returns all headers with this name with all data (like task:get_header_full())
  229. - `strong`: use case sensitive match when matching header's name]],
  230. ['args_schema'] = {ts.string,
  231. (ts.pattern("strong") + ts.pattern("full")):is_optional()}
  232. },
  233. -- Get list of received headers (returns list of tables)
  234. ['received'] = {
  235. ['get_value'] = function(task, args)
  236. local rh = task:get_received_headers()
  237. if args[1] and rh then
  238. return fun.map(function(r) return r[args[1]] end, rh), 'string_list'
  239. end
  240. return rh,'table_list'
  241. end,
  242. ['description'] = [[Get list of received headers.
  243. If no arguments specified, returns list of tables. Otherwise, selects a specific element,
  244. e.g. `by_hostname`]],
  245. },
  246. -- Get all urls
  247. ['urls'] = {
  248. ['get_value'] = function(task, args)
  249. local urls = task:get_urls()
  250. if args[1] and urls then
  251. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  252. end
  253. return urls,'userdata_list'
  254. end,
  255. ['description'] = [[Get list of all urls.
  256. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  257. e.g. `get_tld`]],
  258. },
  259. -- Get all emails
  260. ['emails'] = {
  261. ['get_value'] = function(task, args)
  262. local urls = task:get_emails()
  263. if args[1] and urls then
  264. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  265. end
  266. return urls,'userdata_list'
  267. end,
  268. ['description'] = [[Get list of all emails.
  269. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  270. e.g. `get_user`]],
  271. },
  272. -- Get specific pool var. The first argument must be variable name,
  273. -- the second argument is optional and defines the type (string by default)
  274. ['pool_var'] = {
  275. ['get_value'] = function(task, args)
  276. local type = args[2] or 'string'
  277. return task:get_mempool():get_variable(args[1], type),(type)
  278. end,
  279. ['description'] = [[Get specific pool var. The first argument must be variable name,
  280. the second argument is optional and defines the type (string by default)]],
  281. ['args_schema'] = {ts.string, ts.string:is_optional()}
  282. },
  283. -- Get specific HTTP request header. The first argument must be header name.
  284. ['request_header'] = {
  285. ['get_value'] = function(task, args)
  286. local hdr = task:get_request_header(args[1])
  287. if hdr then
  288. return tostring(hdr),'string'
  289. end
  290. return nil
  291. end,
  292. ['description'] = [[Get specific HTTP request header.
  293. The first argument must be header name.]],
  294. ['args_schema'] = {ts.string}
  295. },
  296. -- Get task date, optionally formatted
  297. ['time'] = {
  298. ['get_value'] = function(task, args)
  299. local what = args[1] or 'message'
  300. local dt = task:get_date{format = what, gmt = true}
  301. if dt then
  302. if args[2] then
  303. -- Should be in format !xxx, as dt is in GMT
  304. return os.date(args[2], dt),'string'
  305. end
  306. return tostring(dt),'string'
  307. end
  308. return nil
  309. end,
  310. ['description'] = [[Get task timestamp. The first argument is type:
  311. - `connect`: connection timestamp (default)
  312. - `message`: timestamp as defined by `Date` header
  313. The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
  314. ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(),
  315. ts.string:is_optional()}
  316. }
  317. }
  318. local function pure_type(ltype)
  319. return ltype:match('^(.*)_list$')
  320. end
  321. local transform_function = {
  322. -- Returns the lowercased string
  323. ['lower'] = {
  324. ['types'] = {
  325. ['string'] = true,
  326. },
  327. ['map_type'] = 'string',
  328. ['process'] = function(inp, _)
  329. return inp:lower(),'string'
  330. end,
  331. ['description'] = 'Returns the lowercased string',
  332. },
  333. -- Returns the first element
  334. ['first'] = {
  335. ['types'] = {
  336. ['list'] = true,
  337. },
  338. ['process'] = function(inp, t)
  339. return fun.head(inp),pure_type(t)
  340. end,
  341. ['description'] = 'Returns the first element',
  342. },
  343. -- Returns the last element
  344. ['last'] = {
  345. ['types'] = {
  346. ['list'] = true,
  347. },
  348. ['process'] = function(inp, t)
  349. return fun.nth(#inp, inp),pure_type(t)
  350. end,
  351. ['description'] = 'Returns the last element',
  352. },
  353. -- Returns the nth element
  354. ['nth'] = {
  355. ['types'] = {
  356. ['list'] = true,
  357. },
  358. ['process'] = function(inp, t, args)
  359. return fun.nth(args[1] or 1, inp),pure_type(t)
  360. end,
  361. ['description'] = 'Returns the nth element',
  362. ['args_schema'] = {ts.number + ts.string / tonumber}
  363. },
  364. ['take_n'] = {
  365. ['types'] = {
  366. ['list'] = true,
  367. },
  368. ['process'] = function(inp, t, args)
  369. return fun.take_n(args[1] or 1, inp),t
  370. end,
  371. ['description'] = 'Returns the n first elements',
  372. ['args_schema'] = {ts.number + ts.string / tonumber}
  373. },
  374. ['drop_n'] = {
  375. ['types'] = {
  376. ['list'] = true,
  377. },
  378. ['process'] = function(inp, t, args)
  379. return fun.drop_n(args[1] or 1, inp),t
  380. end,
  381. ['description'] = 'Returns list without the first n elements',
  382. ['args_schema'] = {ts.number + ts.string / tonumber}
  383. },
  384. -- Joins strings into a single string using separator in the argument
  385. ['join'] = {
  386. ['types'] = {
  387. ['string_list'] = true
  388. },
  389. ['process'] = function(inp, _, args)
  390. return table.concat(fun.totable(inp), args[1] or ''), 'string'
  391. end,
  392. ['description'] = 'Joins strings into a single string using separator in the argument',
  393. ['args_schema'] = {ts.string:is_optional()}
  394. },
  395. -- Sort strings
  396. ['sort'] = {
  397. ['types'] = {
  398. ['list'] = true
  399. },
  400. ['process'] = function(inp, t, _)
  401. table.sort(inp)
  402. return inp, t
  403. end,
  404. ['description'] = 'Sort strings lexicographically',
  405. },
  406. -- Return unique elements based on hashing (can work without sorting)
  407. ['uniq'] = {
  408. ['types'] = {
  409. ['list'] = true
  410. },
  411. ['process'] = function(inp, t, _)
  412. local tmp = {}
  413. fun.each(function(val)
  414. tmp[val] = true
  415. end, inp)
  416. return fun.map(function(k, _) return k end, tmp), t
  417. end,
  418. ['description'] = 'Returns a list of unique elements (using a hash table)',
  419. },
  420. -- Create a digest from string or a list of strings
  421. ['digest'] = {
  422. ['types'] = {
  423. ['string'] = true
  424. },
  425. ['map_type'] = 'hash',
  426. ['process'] = function(inp, _, args)
  427. local hash = require 'rspamd_cryptobox_hash'
  428. local encoding = args[1] or 'hex'
  429. local ht = args[2] or 'blake2'
  430. local h = hash:create_specific(ht):update(inp)
  431. local s
  432. if encoding == 'hex' then
  433. s = h:hex()
  434. elseif encoding == 'base32' then
  435. s = h:base32()
  436. elseif encoding == 'base64' then
  437. s = h:base64()
  438. end
  439. return s,'string'
  440. end,
  441. ['description'] = [[Create a digest from a string.
  442. The first argument is encoding (`hex`, `base32`, `base64`),
  443. the second argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  444. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  445. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  446. },
  447. -- Extracts substring
  448. ['substring'] = {
  449. ['types'] = {
  450. ['string'] = true
  451. },
  452. ['map_type'] = 'string',
  453. ['process'] = function(inp, _, args)
  454. local start_pos = args[1] or 1
  455. local end_pos = args[2] or -1
  456. return inp:sub(start_pos, end_pos), 'string'
  457. end,
  458. ['description'] = 'Extracts substring; the first argument is start, the second is the last (like in Lua)',
  459. ['args_schema'] = {(ts.number + ts.string / tonumber):is_optional(),
  460. (ts.number + ts.string / tonumber):is_optional()}
  461. },
  462. -- Regexp matching
  463. ['regexp'] = {
  464. ['types'] = {
  465. ['string'] = true
  466. },
  467. ['map_type'] = 'string',
  468. ['process'] = function(inp, _, args)
  469. local rspamd_regexp = require "rspamd_regexp"
  470. local re = rspamd_regexp.create_cached(args[1])
  471. if not re then
  472. logger.errx('invalid regexp: %s', args[1])
  473. return nil
  474. end
  475. local res = re:search(inp, false, true)
  476. if res then
  477. if #res == 1 then
  478. return res[1],'string'
  479. end
  480. return res,'string_list'
  481. end
  482. return nil
  483. end,
  484. ['description'] = 'Regexp matching',
  485. ['args_schema'] = {ts.string}
  486. },
  487. -- Drops input value and return values from function's arguments or an empty string
  488. ['id'] = {
  489. ['types'] = {
  490. ['string'] = true,
  491. ['list'] = true,
  492. },
  493. ['map_type'] = 'string',
  494. ['process'] = function(_, _, args)
  495. if args[1] and args[2] then
  496. return fun.map(tostring, args),'string_list'
  497. elseif args[1] then
  498. return args[1],'string'
  499. end
  500. return '','string'
  501. end,
  502. ['description'] = 'Drops input value and return values from function\'s arguments or an empty string',
  503. ['args_schema'] = (ts.string + ts.array_of(ts.string)):is_optional()
  504. },
  505. ['equal'] = {
  506. ['types'] = {
  507. ['string'] = true,
  508. },
  509. ['map_type'] = 'string',
  510. ['process'] = function(inp, _, args)
  511. if inp == args[1] then
  512. return inp,'string'
  513. end
  514. return nil
  515. end,
  516. ['description'] = [[Boolean function equal.
  517. Returns either nil or its argument if input is equal to argument]],
  518. ['args_schema'] = {ts.string}
  519. },
  520. -- Boolean function in, returns either nil or its input if input is in args list
  521. ['in'] = {
  522. ['types'] = {
  523. ['string'] = true,
  524. },
  525. ['map_type'] = 'string',
  526. ['process'] = function(inp, t, args)
  527. for _,a in ipairs(args) do if a == inp then return inp,t end end
  528. return nil
  529. end,
  530. ['description'] = [[Boolean function in.
  531. Returns either nil or its input if input is in args list]],
  532. ['args_schema'] = ts.array_of(ts.string)
  533. },
  534. ['not_in'] = {
  535. ['types'] = {
  536. ['string'] = true,
  537. },
  538. ['map_type'] = 'string',
  539. ['process'] = function(inp, t, args)
  540. for _,a in ipairs(args) do if a == inp then return nil end end
  541. return inp,t
  542. end,
  543. ['description'] = [[Boolean function not in.
  544. Returns either nil or its input if input is not in args list]],
  545. ['args_schema'] = ts.array_of(ts.string)
  546. },
  547. ['inverse'] = {
  548. ['types'] = {
  549. ['string'] = true,
  550. },
  551. ['map_type'] = 'string',
  552. ['process'] = function(inp, _, args)
  553. if inp then
  554. return nil
  555. else
  556. return (args[1] or 'true'),'string'
  557. end
  558. end,
  559. ['description'] = [[Inverses input.
  560. Empty string comes the first argument or 'true', non-empty string comes nil]],
  561. ['args_schema'] = {ts.string:is_optional()}
  562. },
  563. ['ipmask'] = {
  564. ['types'] = {
  565. ['string'] = true,
  566. },
  567. ['map_type'] = 'string',
  568. ['process'] = function(inp, _, args)
  569. local rspamd_ip = require "rspamd_ip"
  570. -- Non optimal: convert string to an IP address
  571. local ip = rspamd_ip.from_string(inp)
  572. if not ip or not ip:is_valid() then
  573. lua_util.debugm(M, "cannot convert %s to IP", inp)
  574. return nil
  575. end
  576. if ip:get_version() == 4 then
  577. local mask = tonumber(args[1])
  578. return ip:apply_mask(mask):to_string(),'string'
  579. else
  580. -- IPv6 takes the second argument or the first one...
  581. local mask_str = args[2] or args[1]
  582. local mask = tonumber(mask_str)
  583. return ip:apply_mask(mask):to_string(),'string'
  584. end
  585. end,
  586. ['description'] = 'Applies mask to IP address.' ..
  587. ' The first argument is the mask for IPv4 addresses, the second is the mask for IPv6 addresses.',
  588. ['args_schema'] = {(ts.number + ts.string / tonumber),
  589. (ts.number + ts.string / tonumber):is_optional()}
  590. },
  591. }
  592. transform_function.match = transform_function.regexp
  593. local function process_selector(task, sel)
  594. local function allowed_type(t)
  595. if t == 'string' or t == 'text' or t == 'string_list' or t == 'text_list' then
  596. return true
  597. end
  598. return false
  599. end
  600. local function list_type(t)
  601. return pure_type(t)
  602. end
  603. local function implicit_tostring(t, ud_or_table)
  604. if t == 'table' then
  605. -- Table (very special)
  606. if ud_or_table.value then
  607. return ud_or_table.value,'string'
  608. elseif ud_or_table.addr then
  609. return ud_or_table.addr,'string'
  610. end
  611. return logger.slog("%s", ud_or_table),'string'
  612. else
  613. return tostring(ud_or_table),'string'
  614. end
  615. end
  616. local input,etype = sel.selector.get_value(task, sel.selector.args)
  617. if not input then
  618. lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name)
  619. return nil
  620. end
  621. lua_util.debugm(M, task, 'extracted %s, type %s',
  622. sel.selector.name, etype)
  623. local pipe = sel.processor_pipe or E
  624. if etype:match('^userdata') or etype:match('^table') then
  625. -- Apply userdata conversion first
  626. local first_elt = pipe[1]
  627. if first_elt and first_elt.method then
  628. -- Explicit conversion
  629. local meth = first_elt
  630. if meth.types[etype] then
  631. lua_util.debugm(M, task, 'apply method `%s` to %s',
  632. meth.name, etype)
  633. input,etype = meth.process(input, etype)
  634. else
  635. local pt = pure_type(etype)
  636. if meth.types[pt] then
  637. lua_util.debugm(M, task, 'map method `%s` to list of %s',
  638. meth.name, pt)
  639. input = fun.map(function(list_elt)
  640. local ret, _ = meth.process(list_elt, pt)
  641. return ret
  642. end, input)
  643. etype = 'string_list'
  644. end
  645. end
  646. -- Remove method from the pipeline
  647. pipe = fun.drop_n(1, pipe)
  648. else
  649. -- Implicit conversion
  650. local pt = pure_type(etype)
  651. if not pt then
  652. lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype)
  653. input = implicit_tostring(etype, input)
  654. etype = 'string'
  655. else
  656. lua_util.debugm(M, task, 'apply implicit map %s->string', pt)
  657. input = fun.map(function(list_elt)
  658. local ret = implicit_tostring(pt, list_elt)
  659. return ret
  660. end, input)
  661. etype = 'string_list'
  662. end
  663. end
  664. end
  665. -- Now we fold elements using left fold
  666. local function fold_function(acc, x)
  667. if acc == nil or acc[1] == nil then
  668. lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name)
  669. return nil
  670. end
  671. local value = acc[1]
  672. local t = acc[2]
  673. if not x.types[t] then
  674. local pt = pure_type(t)
  675. if pt and x.types['list'] then
  676. -- Generic list processor
  677. lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t)
  678. return {x.process(value, t, x.args)}
  679. elseif pt and x.map_type and x.types[pt] then
  680. local map_type = x.map_type .. '_list'
  681. lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s',
  682. x.name, pt, map_type)
  683. return {fun.map(function(list_elt)
  684. if not list_elt then return nil end
  685. local ret, _ = x.process(list_elt, pt, x.args)
  686. return ret
  687. end, value), map_type}
  688. end
  689. logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
  690. return nil
  691. end
  692. lua_util.debugm(M, task, 'apply %s to %s', x.name, t)
  693. return {x.process(value, t, x.args)}
  694. end
  695. local res = fun.foldl(fold_function,
  696. {input, etype},
  697. pipe)
  698. if not res or not res[1] then return nil end -- Pipeline failed
  699. if not allowed_type(res[2]) then
  700. -- Search for implicit conversion
  701. local pt = pure_type(res[2])
  702. if pt then
  703. lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt)
  704. res[1] = fun.map(function(e) return implicit_tostring(pt, e) end, res[1])
  705. res[2] = 'string_list'
  706. else
  707. res[1] = implicit_tostring(res[2], res[1])
  708. res[2] = 'string'
  709. end
  710. end
  711. if list_type(res[2]) then
  712. -- Convert to table as it might have a functional form
  713. res[1] = fun.totable(res[1])
  714. end
  715. lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1])
  716. return res[1]
  717. end
  718. local function make_grammar()
  719. local l = require "lpeg"
  720. local spc = l.S(" \t\n")^0
  721. local atom = l.C((l.R("az") + l.R("AZ") + l.R("09") + l.S("_-"))^1)
  722. local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P'\\' * 1))^0) * "'"
  723. local doublequoted_string = l.P '"' * l.C(((1 - l.S'"\r\n\f\\') + (l.P'\\' * 1))^0) * '"'
  724. local argument = atom + singlequoted_string + doublequoted_string
  725. local dot = l.P(".")
  726. local semicolon = l.P(":")
  727. local obrace = "(" * spc
  728. local ebrace = spc * ")"
  729. local comma = spc * "," * spc
  730. local sel_separator = spc * l.S";*" * spc
  731. return l.P{
  732. "LIST";
  733. LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR")))^0,
  734. EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD"))^-1 * (dot * l.V("PROCESSOR"))^0,
  735. PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  736. FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  737. METHOD = l.Ct(atom / function(e) return '__' .. e end * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  738. ARG_LIST = l.Ct((argument * comma^0)^0)
  739. }
  740. end
  741. local parser = make_grammar()
  742. --[[[
  743. -- @function lua_selectors.parse_selector(cfg, str)
  744. --]]
  745. exports.parse_selector = function(cfg, str)
  746. local parsed = {parser:match(str)}
  747. local output = {}
  748. if not parsed or not parsed[1] then return nil end
  749. local function check_args(name, schema, args)
  750. if schema then
  751. if getmetatable(schema) then
  752. -- Schema covers all arguments
  753. local res,err = schema:transform(args)
  754. if not res then
  755. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  756. return false
  757. else
  758. for i,elt in ipairs(res) do
  759. args[i] = elt
  760. end
  761. end
  762. else
  763. for i,selt in ipairs(schema) do
  764. local res,err = selt:transform(args[i])
  765. if err then
  766. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  767. return false
  768. else
  769. args[i] = res
  770. end
  771. end
  772. end
  773. end
  774. return true
  775. end
  776. -- Output AST format is the following:
  777. -- table of individual selectors
  778. -- each selector: list of functions
  779. -- each function: function name + optional list of arguments
  780. for _,sel in ipairs(parsed) do
  781. local res = {
  782. selector = {},
  783. processor_pipe = {},
  784. }
  785. local selector_tbl = sel[1]
  786. if not selector_tbl then
  787. logger.errx(cfg, 'no selector represented')
  788. return nil
  789. end
  790. if not extractors[selector_tbl[1]] then
  791. logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
  792. return nil
  793. end
  794. res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
  795. res.selector.name = selector_tbl[1]
  796. res.selector.args = selector_tbl[2] or E
  797. if not check_args(res.selector.name,
  798. res.selector.args_schema,
  799. res.selector.args) then
  800. return nil
  801. end
  802. lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
  803. res.selector.name, res.selector.args)
  804. local pipeline_error = false
  805. -- Now process processors pipe
  806. fun.each(function(proc_tbl)
  807. local proc_name = proc_tbl[1]
  808. if proc_name:match('^__') then
  809. -- Special case - method
  810. local method_name = proc_name:match('^__(.*)$')
  811. local processor = {
  812. name = method_name,
  813. method = true,
  814. args = proc_tbl[2] or E,
  815. types = {
  816. userdata = true,
  817. table = true,
  818. },
  819. map_type = 'string',
  820. process = function(inp, t, args)
  821. if t == 'userdata' then
  822. return inp[method_name](inp, args),'string'
  823. else
  824. -- Table
  825. return inp[method_name],'string'
  826. end
  827. end,
  828. }
  829. lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s',
  830. proc_name, res.selector.name, processor.args)
  831. table.insert(res.processor_pipe, processor)
  832. else
  833. if not transform_function[proc_name] then
  834. logger.errx(cfg, 'processor %s is unknown', proc_name)
  835. pipeline_error = true
  836. return nil
  837. end
  838. local processor = lua_util.shallowcopy(transform_function[proc_name])
  839. processor.name = proc_name
  840. processor.args = proc_tbl[2] or E
  841. if not check_args(processor.name, processor.args_schema, processor.args) then
  842. pipeline_error = true
  843. return nil
  844. end
  845. lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
  846. proc_name, res.selector.name, processor.args)
  847. table.insert(res.processor_pipe, processor)
  848. end
  849. end, fun.tail(sel))
  850. if pipeline_error then
  851. logger.errx(cfg, 'unknown or invalid processor used, exiting')
  852. return nil
  853. end
  854. table.insert(output, res)
  855. end
  856. return output
  857. end
  858. --[[[
  859. -- @function lua_selectors.register_extractor(cfg, name, selector)
  860. --]]
  861. exports.register_extractor = function(cfg, name, selector)
  862. if selector.get_value then
  863. if extractors[name] then
  864. logger.warnx(cfg, 'redefining selector %s', name)
  865. end
  866. extractors[name] = selector
  867. return true
  868. end
  869. logger.errx(cfg, 'bad selector %s', name)
  870. return false
  871. end
  872. --[[[
  873. -- @function lua_selectors.register_transform(cfg, name, transform)
  874. --]]
  875. exports.register_transform = function(cfg, name, transform)
  876. if transform.process and transform.types then
  877. if transform_function[name] then
  878. logger.warnx(cfg, 'redefining transform function %s', name)
  879. end
  880. transform_function[name] = transform
  881. return true
  882. end
  883. logger.errx(cfg, 'bad transform function %s', name)
  884. return false
  885. end
  886. --[[[
  887. -- @function lua_selectors.process_selectors(task, selectors_pipe)
  888. --]]
  889. exports.process_selectors = function(task, selectors_pipe)
  890. local ret = {}
  891. for _,sel in ipairs(selectors_pipe) do
  892. local r = process_selector(task, sel)
  893. -- If any element is nil, then the whole selector is nil
  894. if not r then return nil end
  895. table.insert(ret, r)
  896. end
  897. return ret
  898. end
  899. --[[[
  900. -- @function lua_selectors.combine_selectors(task, selectors, delimiter)
  901. --]]
  902. exports.combine_selectors = function(_, selectors, delimiter)
  903. if not delimiter then delimiter = '' end
  904. if not selectors then return nil end
  905. local all_strings = fun.all(function(s) return type(s) == 'string' end, selectors)
  906. if all_strings then
  907. return table.concat(selectors, delimiter)
  908. else
  909. -- We need to do a spill on each table selector
  910. -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
  911. local tbl = {}
  912. local res = {}
  913. for i,s in ipairs(selectors) do
  914. if type(s) == 'string' then
  915. rawset(tbl, i, fun.duplicate(s))
  916. elseif type(s) == 'userdata' then
  917. rawset(tbl, i, fun.duplicate(tostring(s)))
  918. else
  919. rawset(tbl, i, s)
  920. end
  921. end
  922. fun.each(function(...)
  923. table.insert(res, table.concat({...}, delimiter))
  924. end, fun.zip(lua_util.unpack(tbl)))
  925. return res
  926. end
  927. end
  928. --[[[
  929. -- @function lua_selectors.create_closure(cfg, selector_str, delimiter='')
  930. --]]
  931. exports.create_selector_closure = function(cfg, selector_str, delimiter)
  932. local selector = exports.parse_selector(cfg, selector_str)
  933. if not selector then
  934. return nil
  935. end
  936. return function(task)
  937. local res = exports.process_selectors(task, selector)
  938. if res then
  939. return exports.combine_selectors(nil, res, delimiter)
  940. end
  941. return nil
  942. end
  943. end
  944. local function display_selectors(tbl)
  945. return fun.tomap(fun.map(function(k,v)
  946. return k, fun.tomap(fun.filter(function(kk, vv)
  947. return type(vv) ~= 'function'
  948. end, v))
  949. end, tbl))
  950. end
  951. exports.list_extractors = function()
  952. return display_selectors(extractors)
  953. end
  954. exports.list_transforms = function()
  955. return display_selectors(transform_function)
  956. end
  957. return exports