You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_selectors.lua 33KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This module contains 'selectors' implementation: code to extract data
  14. -- from Rspamd tasks and compose those together
  15. --
  16. -- Read more at https://rspamd.com/doc/configuration/selectors.html
  17. --[[[
  18. -- @module lua_selectors
  19. -- This module contains 'selectors' implementation: code to extract data
  20. -- from Rspamd tasks and compose those together.
  21. -- Typical selector looks like this: header(User).lower.substring(1, 2):ip
  22. --]]
  23. local exports = {
  24. maps = {} -- Defined for selectors maps, must be indexed by name
  25. }
  26. local logger = require 'rspamd_logger'
  27. local fun = require 'fun'
  28. local lua_util = require "lua_util"
  29. local ts = require("tableshape").types
  30. local M = "selectors"
  31. local E = {}
  32. local extractors = {
  33. -- Plain id function
  34. ['id'] = {
  35. ['get_value'] = function(_, args)
  36. if args[1] then
  37. return args[1], 'string'
  38. end
  39. return '','string'
  40. end,
  41. ['description'] = [[Return value from function's argument or an empty string,
  42. For example, `id('Something')` returns a string 'Something']],
  43. ['args_schema'] = {ts.string:is_optional()}
  44. },
  45. -- Similar but for making lists
  46. ['list'] = {
  47. ['get_value'] = function(_, args)
  48. if args[1] then
  49. return fun.map(tostring, args), 'string_list'
  50. end
  51. return {},'string_list'
  52. end,
  53. ['description'] = [[Return a list from function's arguments or an empty list,
  54. For example, `list('foo', 'bar')` returns a list {'foo', 'bar'}]],
  55. },
  56. -- Get source IP address
  57. ['ip'] = {
  58. ['get_value'] = function(task)
  59. local ip = task:get_ip()
  60. if ip and ip:is_valid() then return ip,'userdata' end
  61. return nil
  62. end,
  63. ['description'] = [[Get source IP address]],
  64. },
  65. -- Get MIME from
  66. ['from'] = {
  67. ['get_value'] = function(task, args)
  68. local from = task:get_from(args[1] or 0)
  69. if ((from or E)[1] or E).addr then
  70. return from[1],'table'
  71. end
  72. return nil
  73. end,
  74. ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`,
  75. uses any type by default)]],
  76. },
  77. ['rcpts'] = {
  78. ['get_value'] = function(task, args)
  79. local rcpts = task:get_recipients(args[1] or 0)
  80. if ((rcpts or E)[1] or E).addr then
  81. return rcpts,'table_list'
  82. end
  83. return nil
  84. end,
  85. ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`,
  86. uses any type by default)]],
  87. },
  88. -- Get country (ASN module must be executed first)
  89. ['country'] = {
  90. ['get_value'] = function(task)
  91. local country = task:get_mempool():get_variable('country')
  92. if not country then
  93. return nil
  94. else
  95. return country,'string'
  96. end
  97. end,
  98. ['description'] = [[Get country (ASN module must be executed first)]],
  99. },
  100. -- Get ASN number
  101. ['asn'] = {
  102. ['type'] = 'string',
  103. ['get_value'] = function(task)
  104. local asn = task:get_mempool():get_variable('asn')
  105. if not asn then
  106. return nil
  107. else
  108. return asn,'string'
  109. end
  110. end,
  111. ['description'] = [[Get AS number (ASN module must be executed first)]],
  112. },
  113. -- Get authenticated username
  114. ['user'] = {
  115. ['get_value'] = function(task)
  116. local auser = task:get_user()
  117. if not auser then
  118. return nil
  119. else
  120. return auser,'string'
  121. end
  122. end,
  123. ['description'] = 'Get authenticated user name',
  124. },
  125. -- Get principal recipient
  126. ['to'] = {
  127. ['get_value'] = function(task)
  128. return task:get_principal_recipient(),'string'
  129. end,
  130. ['description'] = 'Get principal recipient',
  131. },
  132. -- Get content digest
  133. ['digest'] = {
  134. ['get_value'] = function(task)
  135. return task:get_digest(),'string'
  136. end,
  137. ['description'] = 'Get content digest',
  138. },
  139. -- Get list of all attachments digests
  140. ['attachments'] = {
  141. ['get_value'] = function(task, args)
  142. local s
  143. local parts = task:get_parts() or E
  144. local digests = {}
  145. if #args > 0 then
  146. local rspamd_cryptobox = require "rspamd_cryptobox_hash"
  147. local encoding = args[1] or 'hex'
  148. local ht = args[2] or 'blake2'
  149. for _,p in ipairs(parts) do
  150. if p:get_filename() then
  151. local h = rspamd_cryptobox.create_specific(ht, p:get_content('raw_parsed'))
  152. if encoding == 'hex' then
  153. s = h:hex()
  154. elseif encoding == 'base32' then
  155. s = h:base32()
  156. elseif encoding == 'base64' then
  157. s = h:base64()
  158. end
  159. table.insert(digests, s)
  160. end
  161. end
  162. else
  163. for _,p in ipairs(parts) do
  164. if p:get_filename() then
  165. table.insert(digests, p:get_digest())
  166. end
  167. end
  168. end
  169. if #digests > 0 then
  170. return digests,'string_list'
  171. end
  172. return nil
  173. end,
  174. ['description'] = [[Get list of all attachments digests.
  175. The first optional argument is encoding (`hex`, `base32`, `base64`),
  176. the second optional argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  177. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  178. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  179. },
  180. -- Get all attachments files
  181. ['files'] = {
  182. ['get_value'] = function(task)
  183. local parts = task:get_parts() or E
  184. local files = {}
  185. for _,p in ipairs(parts) do
  186. local fname = p:get_filename()
  187. if fname then
  188. table.insert(files, fname)
  189. end
  190. end
  191. if #files > 0 then
  192. return files,'string_list'
  193. end
  194. return nil
  195. end,
  196. ['description'] = 'Get all attachments files',
  197. },
  198. -- Get languages for text parts
  199. ['languages'] = {
  200. ['get_value'] = function(task)
  201. local text_parts = task:get_text_parts() or E
  202. local languages = {}
  203. for _,p in ipairs(text_parts) do
  204. local lang = p:get_language()
  205. if lang then
  206. table.insert(languages, lang)
  207. end
  208. end
  209. if #languages > 0 then
  210. return languages,'string_list'
  211. end
  212. return nil
  213. end,
  214. ['description'] = 'Get languages for text parts',
  215. },
  216. -- Get helo value
  217. ['helo'] = {
  218. ['get_value'] = function(task)
  219. return task:get_helo(),'string'
  220. end,
  221. ['description'] = 'Get helo value',
  222. },
  223. -- Get header with the name that is expected as an argument. Returns list of
  224. -- headers with this name
  225. ['header'] = {
  226. ['get_value'] = function(task, args)
  227. local strong = false
  228. if args[2] then
  229. if args[2]:match('strong') then
  230. strong = true
  231. end
  232. if args[2]:match('full') then
  233. return task:get_header_full(args[1], strong),'table_list'
  234. end
  235. return task:get_header(args[1], strong),'string'
  236. else
  237. return task:get_header(args[1]),'string'
  238. end
  239. end,
  240. ['description'] = [[Get header with the name that is expected as an argument.
  241. The optional second argument accepts list of flags:
  242. - `full`: returns all headers with this name with all data (like task:get_header_full())
  243. - `strong`: use case sensitive match when matching header's name]],
  244. ['args_schema'] = {ts.string,
  245. (ts.pattern("strong") + ts.pattern("full")):is_optional()}
  246. },
  247. -- Get list of received headers (returns list of tables)
  248. ['received'] = {
  249. ['get_value'] = function(task, args)
  250. local rh = task:get_received_headers()
  251. if args[1] and rh then
  252. return fun.map(function(r) return r[args[1]] end, rh), 'string_list'
  253. end
  254. return rh,'table_list'
  255. end,
  256. ['description'] = [[Get list of received headers.
  257. If no arguments specified, returns list of tables. Otherwise, selects a specific element,
  258. e.g. `by_hostname`]],
  259. },
  260. -- Get all urls
  261. ['urls'] = {
  262. ['get_value'] = function(task, args)
  263. local urls = task:get_urls()
  264. if args[1] and urls then
  265. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  266. end
  267. return urls,'userdata_list'
  268. end,
  269. ['description'] = [[Get list of all urls.
  270. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  271. e.g. `get_tld`]],
  272. },
  273. -- Get all emails
  274. ['emails'] = {
  275. ['get_value'] = function(task, args)
  276. local urls = task:get_emails()
  277. if args[1] and urls then
  278. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  279. end
  280. return urls,'userdata_list'
  281. end,
  282. ['description'] = [[Get list of all emails.
  283. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  284. e.g. `get_user`]],
  285. },
  286. -- Get specific pool var. The first argument must be variable name,
  287. -- the second argument is optional and defines the type (string by default)
  288. ['pool_var'] = {
  289. ['get_value'] = function(task, args)
  290. local type = args[2] or 'string'
  291. return task:get_mempool():get_variable(args[1], type),(type)
  292. end,
  293. ['description'] = [[Get specific pool var. The first argument must be variable name,
  294. the second argument is optional and defines the type (string by default)]],
  295. ['args_schema'] = {ts.string, ts.string:is_optional()}
  296. },
  297. -- Get specific HTTP request header. The first argument must be header name.
  298. ['request_header'] = {
  299. ['get_value'] = function(task, args)
  300. local hdr = task:get_request_header(args[1])
  301. if hdr then
  302. return tostring(hdr),'string'
  303. end
  304. return nil
  305. end,
  306. ['description'] = [[Get specific HTTP request header.
  307. The first argument must be header name.]],
  308. ['args_schema'] = {ts.string}
  309. },
  310. -- Get task date, optionally formatted
  311. ['time'] = {
  312. ['get_value'] = function(task, args)
  313. local what = args[1] or 'message'
  314. local dt = task:get_date{format = what, gmt = true}
  315. if dt then
  316. if args[2] then
  317. -- Should be in format !xxx, as dt is in GMT
  318. return os.date(args[2], dt),'string'
  319. end
  320. return tostring(dt),'string'
  321. end
  322. return nil
  323. end,
  324. ['description'] = [[Get task timestamp. The first argument is type:
  325. - `connect`: connection timestamp (default)
  326. - `message`: timestamp as defined by `Date` header
  327. The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
  328. ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(),
  329. ts.string:is_optional()}
  330. }
  331. }
  332. local function pure_type(ltype)
  333. return ltype:match('^(.*)_list$')
  334. end
  335. local transform_function = {
  336. -- Returns the lowercased string
  337. ['lower'] = {
  338. ['types'] = {
  339. ['string'] = true,
  340. },
  341. ['map_type'] = 'string',
  342. ['process'] = function(inp, _)
  343. return inp:lower(),'string'
  344. end,
  345. ['description'] = 'Returns the lowercased string',
  346. },
  347. -- Returns the first element
  348. ['first'] = {
  349. ['types'] = {
  350. ['list'] = true,
  351. },
  352. ['process'] = function(inp, t)
  353. return fun.head(inp),pure_type(t)
  354. end,
  355. ['description'] = 'Returns the first element',
  356. },
  357. -- Returns the last element
  358. ['last'] = {
  359. ['types'] = {
  360. ['list'] = true,
  361. },
  362. ['process'] = function(inp, t)
  363. return fun.nth(#inp, inp),pure_type(t)
  364. end,
  365. ['description'] = 'Returns the last element',
  366. },
  367. -- Returns the nth element
  368. ['nth'] = {
  369. ['types'] = {
  370. ['list'] = true,
  371. },
  372. ['process'] = function(inp, t, args)
  373. return fun.nth(args[1] or 1, inp),pure_type(t)
  374. end,
  375. ['description'] = 'Returns the nth element',
  376. ['args_schema'] = {ts.number + ts.string / tonumber}
  377. },
  378. ['take_n'] = {
  379. ['types'] = {
  380. ['list'] = true,
  381. },
  382. ['process'] = function(inp, t, args)
  383. return fun.take_n(args[1] or 1, inp),t
  384. end,
  385. ['description'] = 'Returns the n first elements',
  386. ['args_schema'] = {ts.number + ts.string / tonumber}
  387. },
  388. ['drop_n'] = {
  389. ['types'] = {
  390. ['list'] = true,
  391. },
  392. ['process'] = function(inp, t, args)
  393. return fun.drop_n(args[1] or 1, inp),t
  394. end,
  395. ['description'] = 'Returns list without the first n elements',
  396. ['args_schema'] = {ts.number + ts.string / tonumber}
  397. },
  398. -- Joins strings into a single string using separator in the argument
  399. ['join'] = {
  400. ['types'] = {
  401. ['string_list'] = true
  402. },
  403. ['process'] = function(inp, _, args)
  404. return table.concat(fun.totable(inp), args[1] or ''), 'string'
  405. end,
  406. ['description'] = 'Joins strings into a single string using separator in the argument',
  407. ['args_schema'] = {ts.string:is_optional()}
  408. },
  409. -- Sort strings
  410. ['sort'] = {
  411. ['types'] = {
  412. ['list'] = true
  413. },
  414. ['process'] = function(inp, t, _)
  415. table.sort(inp)
  416. return inp, t
  417. end,
  418. ['description'] = 'Sort strings lexicographically',
  419. },
  420. -- Return unique elements based on hashing (can work without sorting)
  421. ['uniq'] = {
  422. ['types'] = {
  423. ['list'] = true
  424. },
  425. ['process'] = function(inp, t, _)
  426. local tmp = {}
  427. fun.each(function(val)
  428. tmp[val] = true
  429. end, inp)
  430. return fun.map(function(k, _) return k end, tmp), t
  431. end,
  432. ['description'] = 'Returns a list of unique elements (using a hash table)',
  433. },
  434. -- Create a digest from string or a list of strings
  435. ['digest'] = {
  436. ['types'] = {
  437. ['string'] = true
  438. },
  439. ['map_type'] = 'hash',
  440. ['process'] = function(inp, _, args)
  441. local hash = require 'rspamd_cryptobox_hash'
  442. local encoding = args[1] or 'hex'
  443. local ht = args[2] or 'blake2'
  444. local h = hash:create_specific(ht):update(inp)
  445. local s
  446. if encoding == 'hex' then
  447. s = h:hex()
  448. elseif encoding == 'base32' then
  449. s = h:base32()
  450. elseif encoding == 'base64' then
  451. s = h:base64()
  452. end
  453. return s,'string'
  454. end,
  455. ['description'] = [[Create a digest from a string.
  456. The first argument is encoding (`hex`, `base32`, `base64`),
  457. the second argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  458. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  459. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  460. },
  461. -- Extracts substring
  462. ['substring'] = {
  463. ['types'] = {
  464. ['string'] = true
  465. },
  466. ['map_type'] = 'string',
  467. ['process'] = function(inp, _, args)
  468. local start_pos = args[1] or 1
  469. local end_pos = args[2] or -1
  470. return inp:sub(start_pos, end_pos), 'string'
  471. end,
  472. ['description'] = 'Extracts substring; the first argument is start, the second is the last (like in Lua)',
  473. ['args_schema'] = {(ts.number + ts.string / tonumber):is_optional(),
  474. (ts.number + ts.string / tonumber):is_optional()}
  475. },
  476. -- Prepends a string or a strings list
  477. ['prepend'] = {
  478. ['types'] = {
  479. ['string'] = true
  480. },
  481. ['map_type'] = 'string',
  482. ['process'] = function(inp, _, args)
  483. local prepend = table.concat(args, '')
  484. return prepend .. inp, 'string'
  485. end,
  486. ['description'] = 'Prepends a string or a strings list',
  487. },
  488. -- Appends a string or a strings list
  489. ['append'] = {
  490. ['types'] = {
  491. ['string'] = true
  492. },
  493. ['map_type'] = 'string',
  494. ['process'] = function(inp, _, args)
  495. local append = table.concat(args, '')
  496. return inp .. append, 'string'
  497. end,
  498. ['description'] = 'Appends a string or a strings list',
  499. },
  500. -- Regexp matching
  501. ['regexp'] = {
  502. ['types'] = {
  503. ['string'] = true
  504. },
  505. ['map_type'] = 'string',
  506. ['process'] = function(inp, _, args)
  507. local rspamd_regexp = require "rspamd_regexp"
  508. local re = rspamd_regexp.create_cached(args[1])
  509. if not re then
  510. logger.errx('invalid regexp: %s', args[1])
  511. return nil
  512. end
  513. local res = re:search(inp, false, true)
  514. if res then
  515. if #res == 1 then
  516. return res[1],'string'
  517. end
  518. return res,'string_list'
  519. end
  520. return nil
  521. end,
  522. ['description'] = 'Regexp matching',
  523. ['args_schema'] = {ts.string}
  524. },
  525. -- Returns a value if it exists in some map (or acts like a `filter` function)
  526. ['filter_map'] = {
  527. ['types'] = {
  528. ['string'] = true
  529. },
  530. ['map_type'] = 'string',
  531. ['process'] = function(inp, t, args)
  532. local map = exports.maps[args[1]]
  533. if not map then
  534. logger.errx('invalid map name: %s', args[1])
  535. return nil
  536. end
  537. local res = map:get_key(inp)
  538. if res then
  539. return inp,t
  540. end
  541. return nil
  542. end,
  543. ['description'] = 'Returns a value if it exists in some map (or acts like a `filter` function)',
  544. ['args_schema'] = {ts.string}
  545. },
  546. -- Returns a value from some map corresponding to some key (or acts like a `map` function)
  547. ['apply_map'] = {
  548. ['types'] = {
  549. ['string'] = true
  550. },
  551. ['map_type'] = 'string',
  552. ['process'] = function(inp, t, args)
  553. local map = exports.maps[args[1]]
  554. if not map then
  555. logger.errx('invalid map name: %s', args[1])
  556. return nil
  557. end
  558. local res = map:get_key(inp)
  559. if res then
  560. return res,t
  561. end
  562. return nil
  563. end,
  564. ['description'] = 'Returns a value from some map corresponding to some key (or acts like a `map` function)',
  565. ['args_schema'] = {ts.string}
  566. },
  567. -- Drops input value and return values from function's arguments or an empty string
  568. ['id'] = {
  569. ['types'] = {
  570. ['string'] = true,
  571. ['list'] = true,
  572. },
  573. ['map_type'] = 'string',
  574. ['process'] = function(_, _, args)
  575. if args[1] and args[2] then
  576. return fun.map(tostring, args),'string_list'
  577. elseif args[1] then
  578. return args[1],'string'
  579. end
  580. return '','string'
  581. end,
  582. ['description'] = 'Drops input value and return values from function\'s arguments or an empty string',
  583. ['args_schema'] = (ts.string + ts.array_of(ts.string)):is_optional()
  584. },
  585. ['equal'] = {
  586. ['types'] = {
  587. ['string'] = true,
  588. },
  589. ['map_type'] = 'string',
  590. ['process'] = function(inp, _, args)
  591. if inp == args[1] then
  592. return inp,'string'
  593. end
  594. return nil
  595. end,
  596. ['description'] = [[Boolean function equal.
  597. Returns either nil or its argument if input is equal to argument]],
  598. ['args_schema'] = {ts.string}
  599. },
  600. -- Boolean function in, returns either nil or its input if input is in args list
  601. ['in'] = {
  602. ['types'] = {
  603. ['string'] = true,
  604. },
  605. ['map_type'] = 'string',
  606. ['process'] = function(inp, t, args)
  607. for _,a in ipairs(args) do if a == inp then return inp,t end end
  608. return nil
  609. end,
  610. ['description'] = [[Boolean function in.
  611. Returns either nil or its input if input is in args list]],
  612. ['args_schema'] = ts.array_of(ts.string)
  613. },
  614. ['not_in'] = {
  615. ['types'] = {
  616. ['string'] = true,
  617. },
  618. ['map_type'] = 'string',
  619. ['process'] = function(inp, t, args)
  620. for _,a in ipairs(args) do if a == inp then return nil end end
  621. return inp,t
  622. end,
  623. ['description'] = [[Boolean function not in.
  624. Returns either nil or its input if input is not in args list]],
  625. ['args_schema'] = ts.array_of(ts.string)
  626. },
  627. ['inverse'] = {
  628. ['types'] = {
  629. ['string'] = true,
  630. },
  631. ['map_type'] = 'string',
  632. ['process'] = function(inp, _, args)
  633. if inp then
  634. return nil
  635. else
  636. return (args[1] or 'true'),'string'
  637. end
  638. end,
  639. ['description'] = [[Inverses input.
  640. Empty string comes the first argument or 'true', non-empty string comes nil]],
  641. ['args_schema'] = {ts.string:is_optional()}
  642. },
  643. ['ipmask'] = {
  644. ['types'] = {
  645. ['string'] = true,
  646. },
  647. ['map_type'] = 'string',
  648. ['process'] = function(inp, _, args)
  649. local rspamd_ip = require "rspamd_ip"
  650. -- Non optimal: convert string to an IP address
  651. local ip = rspamd_ip.from_string(inp)
  652. if not ip or not ip:is_valid() then
  653. lua_util.debugm(M, "cannot convert %s to IP", inp)
  654. return nil
  655. end
  656. if ip:get_version() == 4 then
  657. local mask = tonumber(args[1])
  658. return ip:apply_mask(mask):to_string(),'string'
  659. else
  660. -- IPv6 takes the second argument or the first one...
  661. local mask_str = args[2] or args[1]
  662. local mask = tonumber(mask_str)
  663. return ip:apply_mask(mask):to_string(),'string'
  664. end
  665. end,
  666. ['description'] = 'Applies mask to IP address.' ..
  667. ' The first argument is the mask for IPv4 addresses, the second is the mask for IPv6 addresses.',
  668. ['args_schema'] = {(ts.number + ts.string / tonumber),
  669. (ts.number + ts.string / tonumber):is_optional()}
  670. },
  671. }
  672. transform_function.match = transform_function.regexp
  673. local function process_selector(task, sel)
  674. local function allowed_type(t)
  675. if t == 'string' or t == 'text' or t == 'string_list' or t == 'text_list' then
  676. return true
  677. end
  678. return false
  679. end
  680. local function list_type(t)
  681. return pure_type(t)
  682. end
  683. local function implicit_tostring(t, ud_or_table)
  684. if t == 'table' then
  685. -- Table (very special)
  686. if ud_or_table.value then
  687. return ud_or_table.value,'string'
  688. elseif ud_or_table.addr then
  689. return ud_or_table.addr,'string'
  690. end
  691. return logger.slog("%s", ud_or_table),'string'
  692. else
  693. return tostring(ud_or_table),'string'
  694. end
  695. end
  696. local input,etype = sel.selector.get_value(task, sel.selector.args)
  697. if not input then
  698. lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name)
  699. return nil
  700. end
  701. lua_util.debugm(M, task, 'extracted %s, type %s',
  702. sel.selector.name, etype)
  703. local pipe = sel.processor_pipe or E
  704. if etype:match('^userdata') or etype:match('^table') then
  705. -- Apply userdata conversion first
  706. local first_elt = pipe[1]
  707. if first_elt and first_elt.method then
  708. -- Explicit conversion
  709. local meth = first_elt
  710. if meth.types[etype] then
  711. lua_util.debugm(M, task, 'apply method `%s` to %s',
  712. meth.name, etype)
  713. input,etype = meth.process(input, etype)
  714. else
  715. local pt = pure_type(etype)
  716. if meth.types[pt] then
  717. lua_util.debugm(M, task, 'map method `%s` to list of %s',
  718. meth.name, pt)
  719. -- Map method to a list of inputs, excluding empty elements
  720. input = fun.filter(function(map_elt) return map_elt end,
  721. fun.map(function(list_elt)
  722. local ret, _ = meth.process(list_elt, pt)
  723. return ret
  724. end, input))
  725. etype = 'string_list'
  726. end
  727. end
  728. -- Remove method from the pipeline
  729. pipe = fun.drop_n(1, pipe)
  730. else
  731. -- Implicit conversion
  732. local pt = pure_type(etype)
  733. if not pt then
  734. lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype)
  735. input = implicit_tostring(etype, input)
  736. etype = 'string'
  737. else
  738. lua_util.debugm(M, task, 'apply implicit map %s->string', pt)
  739. input = fun.filter(function(map_elt) return map_elt end,
  740. fun.map(function(list_elt)
  741. local ret = implicit_tostring(pt, list_elt)
  742. return ret
  743. end, input))
  744. etype = 'string_list'
  745. end
  746. end
  747. end
  748. -- Now we fold elements using left fold
  749. local function fold_function(acc, x)
  750. if acc == nil or acc[1] == nil then
  751. lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name)
  752. return nil
  753. end
  754. local value = acc[1]
  755. local t = acc[2]
  756. if not x.types[t] then
  757. local pt = pure_type(t)
  758. if pt and x.types['list'] then
  759. -- Generic list processor
  760. lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t)
  761. return {x.process(value, t, x.args)}
  762. elseif pt and x.map_type and x.types[pt] then
  763. local map_type = x.map_type .. '_list'
  764. lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s',
  765. x.name, pt, map_type)
  766. -- Apply map, filtering empty values
  767. return {
  768. fun.filter(function(map_elt) return map_elt end,
  769. fun.map(function(list_elt)
  770. if not list_elt then return nil end
  771. local ret, _ = x.process(list_elt, pt, x.args)
  772. return ret
  773. end, value)),
  774. map_type -- Returned type
  775. }
  776. end
  777. logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
  778. return nil
  779. end
  780. lua_util.debugm(M, task, 'apply %s to %s', x.name, t)
  781. return {x.process(value, t, x.args)}
  782. end
  783. local res = fun.foldl(fold_function,
  784. {input, etype},
  785. pipe)
  786. if not res or not res[1] then return nil end -- Pipeline failed
  787. if not allowed_type(res[2]) then
  788. -- Search for implicit conversion
  789. local pt = pure_type(res[2])
  790. if pt then
  791. lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt)
  792. res[1] = fun.map(function(e) return implicit_tostring(pt, e) end, res[1])
  793. res[2] = 'string_list'
  794. else
  795. res[1] = implicit_tostring(res[2], res[1])
  796. res[2] = 'string'
  797. end
  798. end
  799. if list_type(res[2]) then
  800. -- Convert to table as it might have a functional form
  801. res[1] = fun.totable(res[1])
  802. end
  803. lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1])
  804. return res[1]
  805. end
  806. local function make_grammar()
  807. local l = require "lpeg"
  808. local spc = l.S(" \t\n")^0
  809. local atom = l.C((l.R("az") + l.R("AZ") + l.R("09") + l.S("_-"))^1)
  810. local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P'\\' * 1))^0) * "'"
  811. local doublequoted_string = l.P '"' * l.C(((1 - l.S'"\r\n\f\\') + (l.P'\\' * 1))^0) * '"'
  812. local argument = atom + singlequoted_string + doublequoted_string
  813. local dot = l.P(".")
  814. local semicolon = l.P(":")
  815. local obrace = "(" * spc
  816. local ebrace = spc * ")"
  817. local comma = spc * "," * spc
  818. local sel_separator = spc * l.S";*" * spc
  819. return l.P{
  820. "LIST";
  821. LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR")))^0,
  822. EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD"))^-1 * (dot * l.V("PROCESSOR"))^0,
  823. PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  824. FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  825. METHOD = l.Ct(atom / function(e) return '__' .. e end * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  826. ARG_LIST = l.Ct((argument * comma^0)^0)
  827. }
  828. end
  829. local parser = make_grammar()
  830. --[[[
  831. -- @function lua_selectors.parse_selector(cfg, str)
  832. --]]
  833. exports.parse_selector = function(cfg, str)
  834. local parsed = {parser:match(str)}
  835. local output = {}
  836. if not parsed or not parsed[1] then return nil end
  837. local function check_args(name, schema, args)
  838. if schema then
  839. if getmetatable(schema) then
  840. -- Schema covers all arguments
  841. local res,err = schema:transform(args)
  842. if not res then
  843. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  844. return false
  845. else
  846. for i,elt in ipairs(res) do
  847. args[i] = elt
  848. end
  849. end
  850. else
  851. for i,selt in ipairs(schema) do
  852. local res,err = selt:transform(args[i])
  853. if err then
  854. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  855. return false
  856. else
  857. args[i] = res
  858. end
  859. end
  860. end
  861. end
  862. return true
  863. end
  864. -- Output AST format is the following:
  865. -- table of individual selectors
  866. -- each selector: list of functions
  867. -- each function: function name + optional list of arguments
  868. for _,sel in ipairs(parsed) do
  869. local res = {
  870. selector = {},
  871. processor_pipe = {},
  872. }
  873. local selector_tbl = sel[1]
  874. if not selector_tbl then
  875. logger.errx(cfg, 'no selector represented')
  876. return nil
  877. end
  878. if not extractors[selector_tbl[1]] then
  879. logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
  880. return nil
  881. end
  882. res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
  883. res.selector.name = selector_tbl[1]
  884. res.selector.args = selector_tbl[2] or E
  885. if not check_args(res.selector.name,
  886. res.selector.args_schema,
  887. res.selector.args) then
  888. return nil
  889. end
  890. lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
  891. res.selector.name, res.selector.args)
  892. local pipeline_error = false
  893. -- Now process processors pipe
  894. fun.each(function(proc_tbl)
  895. local proc_name = proc_tbl[1]
  896. if proc_name:match('^__') then
  897. -- Special case - method
  898. local method_name = proc_name:match('^__(.*)$')
  899. local processor = {
  900. name = method_name,
  901. method = true,
  902. args = proc_tbl[2] or E,
  903. types = {
  904. userdata = true,
  905. table = true,
  906. },
  907. map_type = 'string',
  908. process = function(inp, t, args)
  909. if t == 'userdata' then
  910. return inp[method_name](inp, args),'string'
  911. else
  912. -- Table
  913. return inp[method_name],'string'
  914. end
  915. end,
  916. }
  917. lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s',
  918. proc_name, res.selector.name, processor.args)
  919. table.insert(res.processor_pipe, processor)
  920. else
  921. if not transform_function[proc_name] then
  922. logger.errx(cfg, 'processor %s is unknown', proc_name)
  923. pipeline_error = true
  924. return nil
  925. end
  926. local processor = lua_util.shallowcopy(transform_function[proc_name])
  927. processor.name = proc_name
  928. processor.args = proc_tbl[2] or E
  929. if not check_args(processor.name, processor.args_schema, processor.args) then
  930. pipeline_error = true
  931. return nil
  932. end
  933. lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
  934. proc_name, res.selector.name, processor.args)
  935. table.insert(res.processor_pipe, processor)
  936. end
  937. end, fun.tail(sel))
  938. if pipeline_error then
  939. logger.errx(cfg, 'unknown or invalid processor used, exiting')
  940. return nil
  941. end
  942. table.insert(output, res)
  943. end
  944. return output
  945. end
  946. --[[[
  947. -- @function lua_selectors.register_extractor(cfg, name, selector)
  948. --]]
  949. exports.register_extractor = function(cfg, name, selector)
  950. if selector.get_value then
  951. if extractors[name] then
  952. logger.warnx(cfg, 'redefining selector %s', name)
  953. end
  954. extractors[name] = selector
  955. return true
  956. end
  957. logger.errx(cfg, 'bad selector %s', name)
  958. return false
  959. end
  960. --[[[
  961. -- @function lua_selectors.register_transform(cfg, name, transform)
  962. --]]
  963. exports.register_transform = function(cfg, name, transform)
  964. if transform.process and transform.types then
  965. if transform_function[name] then
  966. logger.warnx(cfg, 'redefining transform function %s', name)
  967. end
  968. transform_function[name] = transform
  969. return true
  970. end
  971. logger.errx(cfg, 'bad transform function %s', name)
  972. return false
  973. end
  974. --[[[
  975. -- @function lua_selectors.process_selectors(task, selectors_pipe)
  976. --]]
  977. exports.process_selectors = function(task, selectors_pipe)
  978. local ret = {}
  979. for _,sel in ipairs(selectors_pipe) do
  980. local r = process_selector(task, sel)
  981. -- If any element is nil, then the whole selector is nil
  982. if not r then return nil end
  983. table.insert(ret, r)
  984. end
  985. return ret
  986. end
  987. --[[[
  988. -- @function lua_selectors.combine_selectors(task, selectors, delimiter)
  989. --]]
  990. exports.combine_selectors = function(_, selectors, delimiter)
  991. if not delimiter then delimiter = '' end
  992. if not selectors then return nil end
  993. local all_strings = fun.all(function(s) return type(s) == 'string' end, selectors)
  994. if all_strings then
  995. return table.concat(selectors, delimiter)
  996. else
  997. -- We need to do a spill on each table selector
  998. -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
  999. local tbl = {}
  1000. local res = {}
  1001. for i,s in ipairs(selectors) do
  1002. if type(s) == 'string' then
  1003. rawset(tbl, i, fun.duplicate(s))
  1004. elseif type(s) == 'userdata' then
  1005. rawset(tbl, i, fun.duplicate(tostring(s)))
  1006. else
  1007. rawset(tbl, i, s)
  1008. end
  1009. end
  1010. fun.each(function(...)
  1011. table.insert(res, table.concat({...}, delimiter))
  1012. end, fun.zip(lua_util.unpack(tbl)))
  1013. return res
  1014. end
  1015. end
  1016. --[[[
  1017. -- @function lua_selectors.create_closure(cfg, selector_str, delimiter='')
  1018. --]]
  1019. exports.create_selector_closure = function(cfg, selector_str, delimiter)
  1020. local selector = exports.parse_selector(cfg, selector_str)
  1021. if not selector then
  1022. return nil
  1023. end
  1024. return function(task)
  1025. local res = exports.process_selectors(task, selector)
  1026. if res then
  1027. return exports.combine_selectors(nil, res, delimiter)
  1028. end
  1029. return nil
  1030. end
  1031. end
  1032. local function display_selectors(tbl)
  1033. return fun.tomap(fun.map(function(k,v)
  1034. return k, fun.tomap(fun.filter(function(kk, vv)
  1035. return type(vv) ~= 'function'
  1036. end, v))
  1037. end, tbl))
  1038. end
  1039. exports.list_extractors = function()
  1040. return display_selectors(extractors)
  1041. end
  1042. exports.list_transforms = function()
  1043. return display_selectors(transform_function)
  1044. end
  1045. return exports