You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_selectors.lua 29KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This module contains 'selectors' implementation: code to extract data
  14. -- from Rspamd tasks and compose those together
  15. --
  16. -- Read more at https://rspamd.com/doc/configuration/selectors.html
  17. --[[[
  18. -- @module lua_selectors
  19. -- This module contains 'selectors' implementation: code to extract data
  20. -- from Rspamd tasks and compose those together.
  21. -- Typical selector looks like this: header(User).lower.substring(1, 2):ip
  22. --]]
  23. local exports = {}
  24. local logger = require 'rspamd_logger'
  25. local fun = require 'fun'
  26. local lua_util = require "lua_util"
  27. local ts = require("tableshape").types
  28. local M = "selectors"
  29. local E = {}
  30. local extractors = {
  31. ['id'] = {
  32. ['get_value'] = function(_, args)
  33. if args[1] then
  34. return args[1], 'string'
  35. end
  36. return '','string'
  37. end,
  38. ['description'] = [[Return value from function's argument or an empty string,
  39. For example, `id('Something')` returns a string 'Something']],
  40. ['args_schema'] = {ts.string:is_optional()}
  41. },
  42. -- Get source IP address
  43. ['ip'] = {
  44. ['get_value'] = function(task)
  45. local ip = task:get_ip()
  46. if ip and ip:is_valid() then return ip,'userdata' end
  47. return nil
  48. end,
  49. ['description'] = [[Get source IP address]],
  50. },
  51. -- Get MIME from
  52. ['from'] = {
  53. ['get_value'] = function(task, args)
  54. local from = task:get_from(args[1] or 0)
  55. if ((from or E)[1] or E).addr then
  56. return from[1],'table'
  57. end
  58. return nil
  59. end,
  60. ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`,
  61. uses any type by default)]],
  62. },
  63. ['rcpts'] = {
  64. ['get_value'] = function(task, args)
  65. local rcpts = task:get_recipients(args[1] or 0)
  66. if ((rcpts or E)[1] or E).addr then
  67. return rcpts,'table_list'
  68. end
  69. return nil
  70. end,
  71. ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`,
  72. uses any type by default)]],
  73. },
  74. -- Get country (ASN module must be executed first)
  75. ['country'] = {
  76. ['get_value'] = function(task)
  77. local country = task:get_mempool():get_variable('country')
  78. if not country then
  79. return nil
  80. else
  81. return country,'string'
  82. end
  83. end,
  84. ['description'] = [[Get country (ASN module must be executed first)]],
  85. },
  86. -- Get ASN number
  87. ['asn'] = {
  88. ['type'] = 'string',
  89. ['get_value'] = function(task)
  90. local asn = task:get_mempool():get_variable('asn')
  91. if not asn then
  92. return nil
  93. else
  94. return asn,'string'
  95. end
  96. end,
  97. ['description'] = [[Get AS number (ASN module must be executed first)]],
  98. },
  99. -- Get authenticated username
  100. ['user'] = {
  101. ['get_value'] = function(task)
  102. local auser = task:get_user()
  103. if not auser then
  104. return nil
  105. else
  106. return auser,'string'
  107. end
  108. end,
  109. ['description'] = 'Get authenticated user name',
  110. },
  111. -- Get principal recipient
  112. ['to'] = {
  113. ['get_value'] = function(task)
  114. return task:get_principal_recipient(),'string'
  115. end,
  116. ['description'] = 'Get principal recipient',
  117. },
  118. -- Get content digest
  119. ['digest'] = {
  120. ['get_value'] = function(task)
  121. return task:get_digest(),'string'
  122. end,
  123. ['description'] = 'Get content digest',
  124. },
  125. -- Get list of all attachments digests
  126. ['attachments'] = {
  127. ['get_value'] = function(task, args)
  128. local s
  129. local parts = task:get_parts() or E
  130. local digests = {}
  131. if #args > 0 then
  132. local rspamd_cryptobox = require "rspamd_cryptobox_hash"
  133. local encoding = args[1] or 'hex'
  134. local ht = args[2] or 'blake2'
  135. for _,p in ipairs(parts) do
  136. if p:get_filename() then
  137. local h = rspamd_cryptobox.create_specific(ht, p:get_content('raw_parsed'))
  138. if encoding == 'hex' then
  139. s = h:hex()
  140. elseif encoding == 'base32' then
  141. s = h:base32()
  142. elseif encoding == 'base64' then
  143. s = h:base64()
  144. end
  145. table.insert(digests, s)
  146. end
  147. end
  148. else
  149. for _,p in ipairs(parts) do
  150. if p:get_filename() then
  151. table.insert(digests, p:get_digest())
  152. end
  153. end
  154. end
  155. if #digests > 0 then
  156. return digests,'string_list'
  157. end
  158. return nil
  159. end,
  160. ['description'] = [[Get list of all attachments digests.
  161. The first optional argument is encoding (`hex`, `base32`, `base64`),
  162. the second optional argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  163. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  164. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  165. },
  166. -- Get all attachments files
  167. ['files'] = {
  168. ['get_value'] = function(task)
  169. local parts = task:get_parts() or E
  170. local files = {}
  171. for _,p in ipairs(parts) do
  172. local fname = p:get_filename()
  173. if fname then
  174. table.insert(files, fname)
  175. end
  176. end
  177. if #files > 0 then
  178. return files,'string_list'
  179. end
  180. return nil
  181. end,
  182. ['description'] = 'Get all attachments files',
  183. },
  184. -- Get helo value
  185. ['helo'] = {
  186. ['get_value'] = function(task)
  187. return task:get_helo(),'string'
  188. end,
  189. ['description'] = 'Get helo value',
  190. },
  191. -- Get header with the name that is expected as an argument. Returns list of
  192. -- headers with this name
  193. ['header'] = {
  194. ['get_value'] = function(task, args)
  195. local strong = false
  196. if args[2] then
  197. if args[2]:match('strong') then
  198. strong = true
  199. end
  200. if args[2]:match('full') then
  201. return task:get_header_full(args[1], strong),'table_list'
  202. end
  203. return task:get_header(args[1], strong),'string'
  204. else
  205. return task:get_header(args[1]),'string'
  206. end
  207. end,
  208. ['description'] = [[Get header with the name that is expected as an argument.
  209. The optional second argument accepts list of flags:
  210. - `full`: returns all headers with this name with all data (like task:get_header_full())
  211. - `strong`: use case sensitive match when matching header's name]],
  212. ['args_schema'] = {ts.string,
  213. (ts.pattern("strong") + ts.pattern("full")):is_optional()}
  214. },
  215. -- Get list of received headers (returns list of tables)
  216. ['received'] = {
  217. ['get_value'] = function(task, args)
  218. local rh = task:get_received_headers()
  219. if args[1] and rh then
  220. return fun.map(function(r) return r[args[1]] end, rh), 'string_list'
  221. end
  222. return rh,'table_list'
  223. end,
  224. ['description'] = [[Get list of received headers.
  225. If no arguments specified, returns list of tables. Otherwise, selects a specific element,
  226. e.g. `by_hostname`]],
  227. },
  228. -- Get all urls
  229. ['urls'] = {
  230. ['get_value'] = function(task, args)
  231. local urls = task:get_urls()
  232. if args[1] and urls then
  233. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  234. end
  235. return urls,'userdata_list'
  236. end,
  237. ['description'] = [[Get list of all urls.
  238. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  239. e.g. `get_tld`]],
  240. },
  241. -- Get all emails
  242. ['emails'] = {
  243. ['get_value'] = function(task, args)
  244. local urls = task:get_emails()
  245. if args[1] and urls then
  246. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  247. end
  248. return urls,'userdata_list'
  249. end,
  250. ['description'] = [[Get list of all emails.
  251. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  252. e.g. `get_user`]],
  253. },
  254. -- Get specific pool var. The first argument must be variable name,
  255. -- the second argument is optional and defines the type (string by default)
  256. ['pool_var'] = {
  257. ['get_value'] = function(task, args)
  258. local type = args[2] or 'string'
  259. return task:get_mempool():get_variable(args[1], type),(type)
  260. end,
  261. ['description'] = [[Get specific pool var. The first argument must be variable name,
  262. the second argument is optional and defines the type (string by default)]],
  263. ['args_schema'] = {ts.string, ts.string:is_optional()}
  264. },
  265. -- Get specific HTTP request header. The first argument must be header name.
  266. ['request_header'] = {
  267. ['get_value'] = function(task, args)
  268. local hdr = task:get_request_header(args[1])
  269. if hdr then
  270. return tostring(hdr),'string'
  271. end
  272. return nil
  273. end,
  274. ['description'] = [[Get specific HTTP request header.
  275. The first argument must be header name.]],
  276. ['args_schema'] = {ts.string}
  277. },
  278. -- Get task date, optionally formatted
  279. ['time'] = {
  280. ['get_value'] = function(task, args)
  281. local what = args[1] or 'message'
  282. local dt = task:get_date{format = what, gmt = true}
  283. if dt then
  284. if args[2] then
  285. -- Should be in format !xxx, as dt is in GMT
  286. return os.date(args[2], dt),'string'
  287. end
  288. return tostring(dt),'string'
  289. end
  290. return nil
  291. end,
  292. ['description'] = [[Get task timestamp. The first argument is type:
  293. - `connect`: connection timestamp (default)
  294. - `message`: timestamp as defined by `Date` header
  295. The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
  296. ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(),
  297. ts.string:is_optional()}
  298. }
  299. }
  300. local function pure_type(ltype)
  301. return ltype:match('^(.*)_list$')
  302. end
  303. local transform_function = {
  304. -- Returns the lowercased string
  305. ['lower'] = {
  306. ['types'] = {
  307. ['string'] = true,
  308. },
  309. ['map_type'] = 'string',
  310. ['process'] = function(inp, _)
  311. return inp:lower(),'string'
  312. end,
  313. ['description'] = 'Returns the lowercased string',
  314. },
  315. -- Returns the first element
  316. ['first'] = {
  317. ['types'] = {
  318. ['list'] = true,
  319. },
  320. ['process'] = function(inp, t)
  321. return fun.head(inp),pure_type(t)
  322. end,
  323. ['description'] = 'Returns the first element',
  324. },
  325. -- Returns the last element
  326. ['last'] = {
  327. ['types'] = {
  328. ['list'] = true,
  329. },
  330. ['process'] = function(inp, t)
  331. return fun.nth(#inp, inp),pure_type(t)
  332. end,
  333. ['description'] = 'Returns the last element',
  334. },
  335. -- Returns the nth element
  336. ['nth'] = {
  337. ['types'] = {
  338. ['list'] = true,
  339. },
  340. ['process'] = function(inp, t, args)
  341. return fun.nth(args[1] or 1, inp),pure_type(t)
  342. end,
  343. ['description'] = 'Returns the nth element',
  344. ['args_schema'] = {ts.number + ts.string / tonumber}
  345. },
  346. ['take_n'] = {
  347. ['types'] = {
  348. ['list'] = true,
  349. },
  350. ['process'] = function(inp, t, args)
  351. return fun.take_n(args[1] or 1, inp),t
  352. end,
  353. ['description'] = 'Returns the n first elements',
  354. ['args_schema'] = {ts.number + ts.string / tonumber}
  355. },
  356. ['drop_n'] = {
  357. ['types'] = {
  358. ['list'] = true,
  359. },
  360. ['process'] = function(inp, t, args)
  361. return fun.drop_n(args[1] or 1, inp),t
  362. end,
  363. ['description'] = 'Returns list without the first n elements',
  364. ['args_schema'] = {ts.number + ts.string / tonumber}
  365. },
  366. -- Joins strings into a single string using separator in the argument
  367. ['join'] = {
  368. ['types'] = {
  369. ['string_list'] = true
  370. },
  371. ['process'] = function(inp, _, args)
  372. return table.concat(fun.totable(inp), args[1] or ''), 'string'
  373. end,
  374. ['description'] = 'Joins strings into a single string using separator in the argument',
  375. ['args_schema'] = {ts.string:is_optional()}
  376. },
  377. -- Create a digest from string or a list of strings
  378. ['digest'] = {
  379. ['types'] = {
  380. ['string'] = true
  381. },
  382. ['map_type'] = 'hash',
  383. ['process'] = function(inp, _, args)
  384. local hash = require 'rspamd_cryptobox_hash'
  385. local encoding = args[1] or 'hex'
  386. local ht = args[2] or 'blake2'
  387. local h = hash:create_specific(ht):update(inp)
  388. local s
  389. if encoding == 'hex' then
  390. s = h:hex()
  391. elseif encoding == 'base32' then
  392. s = h:base32()
  393. elseif encoding == 'base64' then
  394. s = h:base64()
  395. end
  396. return s,'string'
  397. end,
  398. ['description'] = [[Create a digest from a string.
  399. The first argument is encoding (`hex`, `base32`, `base64`),
  400. the second argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  401. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  402. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  403. },
  404. -- Extracts substring
  405. ['substring'] = {
  406. ['types'] = {
  407. ['string'] = true
  408. },
  409. ['map_type'] = 'string',
  410. ['process'] = function(inp, _, args)
  411. local start_pos = args[1] or 1
  412. local end_pos = args[2] or -1
  413. return inp:sub(start_pos, end_pos), 'string'
  414. end,
  415. ['description'] = 'Extracts substring; the first argument is start, the second is the last (like in Lua)',
  416. ['args_schema'] = {(ts.number + ts.string / tonumber):is_optional(),
  417. (ts.number + ts.string / tonumber):is_optional()}
  418. },
  419. -- Regexp matching
  420. ['regexp'] = {
  421. ['types'] = {
  422. ['string'] = true
  423. },
  424. ['map_type'] = 'string',
  425. ['process'] = function(inp, _, args)
  426. local rspamd_regexp = require "rspamd_regexp"
  427. local re = rspamd_regexp.create_cached(args[1])
  428. if not re then
  429. logger.errx('invalid regexp: %s', args[1])
  430. return nil
  431. end
  432. local res = re:search(inp, false, true)
  433. if res then
  434. if #res == 1 then
  435. return res[1],'string'
  436. end
  437. return res,'string_list'
  438. end
  439. return nil
  440. end,
  441. ['description'] = 'Regexp matching',
  442. ['args_schema'] = {ts.string}
  443. },
  444. -- Drops input value and return values from function's arguments or an empty string
  445. ['id'] = {
  446. ['types'] = {
  447. ['string'] = true,
  448. ['list'] = true,
  449. },
  450. ['map_type'] = 'string',
  451. ['process'] = function(_, _, args)
  452. if args[1] and args[2] then
  453. return fun.map(tostring, args),'string_list'
  454. elseif args[1] then
  455. return args[1],'string'
  456. end
  457. return '','string'
  458. end,
  459. ['description'] = 'Drops input value and return values from function\'s arguments or an empty string',
  460. ['args_schema'] = (ts.string + ts.array_of(ts.string)):is_optional()
  461. },
  462. ['equal'] = {
  463. ['types'] = {
  464. ['string'] = true,
  465. },
  466. ['map_type'] = 'string',
  467. ['process'] = function(inp, _, args)
  468. if inp == args[1] then
  469. return inp,'string'
  470. end
  471. return nil
  472. end,
  473. ['description'] = [[Boolean function equal.
  474. Returns either nil or its argument if input is equal to argument]],
  475. ['args_schema'] = {ts.string}
  476. },
  477. -- Boolean function in, returns either nil or its input if input is in args list
  478. ['in'] = {
  479. ['types'] = {
  480. ['string'] = true,
  481. },
  482. ['map_type'] = 'string',
  483. ['process'] = function(inp, t, args)
  484. for _,a in ipairs(args) do if a == inp then return inp,t end end
  485. return nil
  486. end,
  487. ['description'] = [[Boolean function in.
  488. Returns either nil or its input if input is in args list]],
  489. ['args_schema'] = ts.array_of(ts.string)
  490. },
  491. ['not_in'] = {
  492. ['types'] = {
  493. ['string'] = true,
  494. },
  495. ['map_type'] = 'string',
  496. ['process'] = function(inp, t, args)
  497. for _,a in ipairs(args) do if a == inp then return nil end end
  498. return inp,t
  499. end,
  500. ['description'] = [[Boolean function not in.
  501. Returns either nil or its input if input is not in args list]],
  502. ['args_schema'] = ts.array_of(ts.string)
  503. },
  504. ['inverse'] = {
  505. ['types'] = {
  506. ['string'] = true,
  507. },
  508. ['map_type'] = 'string',
  509. ['process'] = function(inp, _, args)
  510. if inp then
  511. return nil
  512. else
  513. return (args[1] or 'true'),'string'
  514. end
  515. end,
  516. ['description'] = [[Inverses input.
  517. Empty string comes the first argument or 'true', non-empty string comes nil]],
  518. ['args_schema'] = {ts.string:is_optional()}
  519. },
  520. ['ipmask'] = {
  521. ['types'] = {
  522. ['string'] = true,
  523. },
  524. ['map_type'] = 'string',
  525. ['process'] = function(inp, _, args)
  526. local rspamd_ip = require "rspamd_ip"
  527. -- Non optimal: convert string to an IP address
  528. local ip = rspamd_ip.from_string(inp)
  529. if not ip or not ip:is_valid() then
  530. lua_util.debugm(M, "cannot convert %s to IP", inp)
  531. return nil
  532. end
  533. if ip:get_version() == 4 then
  534. local mask = tonumber(args[1])
  535. return ip:apply_mask(mask):to_string(),'string'
  536. else
  537. -- IPv6 takes the second argument or the first one...
  538. local mask_str = args[2] or args[1]
  539. local mask = tonumber(mask_str)
  540. return ip:apply_mask(mask):to_string(),'string'
  541. end
  542. end,
  543. ['description'] = 'Applies mask to IP address.' ..
  544. ' The first argument is the mask for IPv4 addresses, the second is the mask for IPv6 addresses.',
  545. ['args_schema'] = {(ts.number + ts.string / tonumber),
  546. (ts.number + ts.string / tonumber):is_optional()}
  547. },
  548. }
  549. transform_function.match = transform_function.regexp
  550. local function process_selector(task, sel)
  551. local function allowed_type(t)
  552. if t == 'string' or t == 'text' or t == 'string_list' or t == 'text_list' then
  553. return true
  554. end
  555. return false
  556. end
  557. local function list_type(t)
  558. return pure_type(t)
  559. end
  560. local function implicit_tostring(t, ud_or_table)
  561. if t == 'table' then
  562. -- Table (very special)
  563. if ud_or_table.value then
  564. return ud_or_table.value,'string'
  565. elseif ud_or_table.addr then
  566. return ud_or_table.addr,'string'
  567. end
  568. return logger.slog("%s", ud_or_table),'string'
  569. else
  570. return tostring(ud_or_table),'string'
  571. end
  572. end
  573. local input,etype = sel.selector.get_value(task, sel.selector.args)
  574. if not input then
  575. lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name)
  576. return nil
  577. end
  578. lua_util.debugm(M, task, 'extracted %s, type %s',
  579. sel.selector.name, etype)
  580. local pipe = sel.processor_pipe or E
  581. if etype:match('^userdata') or etype:match('^table') then
  582. -- Apply userdata conversion first
  583. local first_elt = pipe[1]
  584. if first_elt and first_elt.method then
  585. -- Explicit conversion
  586. local meth = first_elt
  587. if meth.types[etype] then
  588. lua_util.debugm(M, task, 'apply method `%s` to %s',
  589. meth.name, etype)
  590. input,etype = meth.process(input, etype)
  591. else
  592. local pt = pure_type(etype)
  593. if meth.types[pt] then
  594. lua_util.debugm(M, task, 'map method `%s` to list of %s',
  595. meth.name, pt)
  596. input = fun.map(function(list_elt)
  597. local ret, _ = meth.process(list_elt, pt)
  598. return ret
  599. end, input)
  600. etype = 'string_list'
  601. end
  602. end
  603. -- Remove method from the pipeline
  604. pipe = fun.drop_n(1, pipe)
  605. else
  606. -- Implicit conversion
  607. local pt = pure_type(etype)
  608. if not pt then
  609. lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype)
  610. input = implicit_tostring(etype, input)
  611. etype = 'string'
  612. else
  613. lua_util.debugm(M, task, 'apply implicit map %s->string', pt)
  614. input = fun.map(function(list_elt)
  615. local ret = implicit_tostring(pt, list_elt)
  616. return ret
  617. end, input)
  618. etype = 'string_list'
  619. end
  620. end
  621. end
  622. -- Now we fold elements using left fold
  623. local function fold_function(acc, x)
  624. if acc == nil or acc[1] == nil then
  625. lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name)
  626. return nil
  627. end
  628. local value = acc[1]
  629. local t = acc[2]
  630. if not x.types[t] then
  631. local pt = pure_type(t)
  632. if pt and x.types['list'] then
  633. -- Generic list processor
  634. lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t)
  635. return {x.process(value, t, x.args)}
  636. elseif pt and x.map_type and x.types[pt] then
  637. local map_type = x.map_type .. '_list'
  638. lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s',
  639. x.name, pt, map_type)
  640. return {fun.map(function(list_elt)
  641. if not list_elt then return nil end
  642. local ret, _ = x.process(list_elt, pt, x.args)
  643. return ret
  644. end, value), map_type}
  645. end
  646. logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
  647. return nil
  648. end
  649. lua_util.debugm(M, task, 'apply %s to %s', x.name, t)
  650. return {x.process(value, t, x.args)}
  651. end
  652. local res = fun.foldl(fold_function,
  653. {input, etype},
  654. pipe)
  655. if not res or not res[1] then return nil end -- Pipeline failed
  656. if not allowed_type(res[2]) then
  657. -- Search for implicit conversion
  658. local pt = pure_type(res[2])
  659. if pt then
  660. lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt)
  661. res[1] = fun.map(function(e) return implicit_tostring(pt, e) end, res[1])
  662. res[2] = 'string_list'
  663. else
  664. res[1] = implicit_tostring(res[2], res[1])
  665. res[2] = 'string'
  666. end
  667. end
  668. if list_type(res[2]) then
  669. -- Convert to table as it might have a functional form
  670. res[1] = fun.totable(res[1])
  671. end
  672. lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1])
  673. return res[1]
  674. end
  675. local function make_grammar()
  676. local l = require "lpeg"
  677. local spc = l.S(" \t\n")^0
  678. local atom = l.C((l.R("az") + l.R("AZ") + l.R("09") + l.S("_-"))^1)
  679. local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P'\\' * 1))^0) * "'"
  680. local doublequoted_string = l.P '"' * l.C(((1 - l.S'"\r\n\f\\') + (l.P'\\' * 1))^0) * '"'
  681. local argument = atom + singlequoted_string + doublequoted_string
  682. local dot = l.P(".")
  683. local semicolon = l.P(":")
  684. local obrace = "(" * spc
  685. local ebrace = spc * ")"
  686. local comma = spc * "," * spc
  687. local sel_separator = spc * l.S";*" * spc
  688. return l.P{
  689. "LIST";
  690. LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR")))^0,
  691. EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD"))^-1 * (dot * l.V("PROCESSOR"))^0,
  692. PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  693. FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  694. METHOD = l.Ct(atom / function(e) return '__' .. e end * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  695. ARG_LIST = l.Ct((argument * comma^0)^0)
  696. }
  697. end
  698. local parser = make_grammar()
  699. --[[[
  700. -- @function lua_selectors.parse_selector(cfg, str)
  701. --]]
  702. exports.parse_selector = function(cfg, str)
  703. local parsed = {parser:match(str)}
  704. local output = {}
  705. if not parsed or not parsed[1] then return nil end
  706. local function check_args(name, schema, args)
  707. if schema then
  708. if getmetatable(schema) then
  709. -- Schema covers all arguments
  710. local res,err = schema:transform(args)
  711. if not res then
  712. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  713. return false
  714. else
  715. for i,elt in ipairs(res) do
  716. args[i] = elt
  717. end
  718. end
  719. else
  720. for i,selt in ipairs(schema) do
  721. local res,err = selt:transform(args[i])
  722. if err then
  723. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  724. return false
  725. else
  726. args[i] = res
  727. end
  728. end
  729. end
  730. end
  731. return true
  732. end
  733. -- Output AST format is the following:
  734. -- table of individual selectors
  735. -- each selector: list of functions
  736. -- each function: function name + optional list of arguments
  737. for _,sel in ipairs(parsed) do
  738. local res = {
  739. selector = {},
  740. processor_pipe = {},
  741. }
  742. local selector_tbl = sel[1]
  743. if not selector_tbl then
  744. logger.errx(cfg, 'no selector represented')
  745. return nil
  746. end
  747. if not extractors[selector_tbl[1]] then
  748. logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
  749. return nil
  750. end
  751. res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
  752. res.selector.name = selector_tbl[1]
  753. res.selector.args = selector_tbl[2] or E
  754. if not check_args(res.selector.name,
  755. res.selector.args_schema,
  756. res.selector.args) then
  757. return nil
  758. end
  759. lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
  760. res.selector.name, res.selector.args)
  761. local pipeline_error = false
  762. -- Now process processors pipe
  763. fun.each(function(proc_tbl)
  764. local proc_name = proc_tbl[1]
  765. if proc_name:match('^__') then
  766. -- Special case - method
  767. local method_name = proc_name:match('^__(.*)$')
  768. local processor = {
  769. name = method_name,
  770. method = true,
  771. args = proc_tbl[2] or E,
  772. types = {
  773. userdata = true,
  774. table = true,
  775. },
  776. map_type = 'string',
  777. process = function(inp, t, args)
  778. if t == 'userdata' then
  779. return inp[method_name](inp, args),'string'
  780. else
  781. -- Table
  782. return inp[method_name],'string'
  783. end
  784. end,
  785. }
  786. lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s',
  787. proc_name, res.selector.name, processor.args)
  788. table.insert(res.processor_pipe, processor)
  789. else
  790. if not transform_function[proc_name] then
  791. logger.errx(cfg, 'processor %s is unknown', proc_name)
  792. pipeline_error = true
  793. return nil
  794. end
  795. local processor = lua_util.shallowcopy(transform_function[proc_name])
  796. processor.name = proc_name
  797. processor.args = proc_tbl[2] or E
  798. if not check_args(processor.name, processor.args_schema, processor.args) then
  799. pipeline_error = true
  800. return nil
  801. end
  802. lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
  803. proc_name, res.selector.name, processor.args)
  804. table.insert(res.processor_pipe, processor)
  805. end
  806. end, fun.tail(sel))
  807. if pipeline_error then
  808. logger.errx(cfg, 'unknown or invalid processor used, exiting')
  809. return nil
  810. end
  811. table.insert(output, res)
  812. end
  813. return output
  814. end
  815. --[[[
  816. -- @function lua_selectors.register_extractor(cfg, name, selector)
  817. --]]
  818. exports.register_extractor = function(cfg, name, selector)
  819. if selector.get_value then
  820. if extractors[name] then
  821. logger.warnx(cfg, 'redefining selector %s', name)
  822. end
  823. extractors[name] = selector
  824. return true
  825. end
  826. logger.errx(cfg, 'bad selector %s', name)
  827. return false
  828. end
  829. --[[[
  830. -- @function lua_selectors.register_transform(cfg, name, transform)
  831. --]]
  832. exports.register_transform = function(cfg, name, transform)
  833. if transform.process and transform.types then
  834. if transform_function[name] then
  835. logger.warnx(cfg, 'redefining transform function %s', name)
  836. end
  837. transform_function[name] = transform
  838. return true
  839. end
  840. logger.errx(cfg, 'bad transform function %s', name)
  841. return false
  842. end
  843. --[[[
  844. -- @function lua_selectors.process_selectors(task, selectors_pipe)
  845. --]]
  846. exports.process_selectors = function(task, selectors_pipe)
  847. local ret = {}
  848. for _,sel in ipairs(selectors_pipe) do
  849. local r = process_selector(task, sel)
  850. -- If any element is nil, then the whole selector is nil
  851. if not r then return nil end
  852. table.insert(ret, r)
  853. end
  854. return ret
  855. end
  856. --[[[
  857. -- @function lua_selectors.combine_selectors(task, selectors, delimiter)
  858. --]]
  859. exports.combine_selectors = function(_, selectors, delimiter)
  860. if not delimiter then delimiter = '' end
  861. if not selectors then return nil end
  862. local all_strings = fun.all(function(s) return type(s) == 'string' end, selectors)
  863. if all_strings then
  864. return table.concat(selectors, delimiter)
  865. else
  866. -- We need to do a spill on each table selector
  867. -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
  868. local tbl = {}
  869. local res = {}
  870. for i,s in ipairs(selectors) do
  871. if type(s) == 'string' then
  872. rawset(tbl, i, fun.duplicate(s))
  873. elseif type(s) == 'userdata' then
  874. rawset(tbl, i, fun.duplicate(tostring(s)))
  875. else
  876. rawset(tbl, i, s)
  877. end
  878. end
  879. fun.each(function(...)
  880. table.insert(res, table.concat({...}, delimiter))
  881. end, fun.zip(lua_util.unpack(tbl)))
  882. return res
  883. end
  884. end
  885. --[[[
  886. -- @function lua_selectors.create_closure(cfg, selector_str, delimiter='')
  887. --]]
  888. exports.create_selector_closure = function(cfg, selector_str, delimiter)
  889. local selector = exports.parse_selector(cfg, selector_str)
  890. if not selector then
  891. return nil
  892. end
  893. return function(task)
  894. local res = exports.process_selectors(task, selector)
  895. if res then
  896. return exports.combine_selectors(nil, res, delimiter)
  897. end
  898. return nil
  899. end
  900. end
  901. local function display_selectors(tbl)
  902. return fun.tomap(fun.map(function(k,v)
  903. return k, fun.tomap(fun.filter(function(kk, vv)
  904. return type(vv) ~= 'function'
  905. end, v))
  906. end, tbl))
  907. end
  908. exports.list_extractors = function()
  909. return display_selectors(extractors)
  910. end
  911. exports.list_transforms = function()
  912. return display_selectors(transform_function)
  913. end
  914. return exports