You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_selectors.lua 26KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This module contains 'selectors' implementation: code to extract data
  14. -- from Rspamd tasks and compose those together
  15. --
  16. -- Read more at https://rspamd.com/doc/configuration/selectors.html
  17. --[[[
  18. -- @module lua_selectors
  19. -- This module contains 'selectors' implementation: code to extract data
  20. -- from Rspamd tasks and compose those together.
  21. -- Typical selector looks like this: header(User).lower.substring(1, 2):ip
  22. --]]
  23. local exports = {}
  24. local logger = require 'rspamd_logger'
  25. local fun = require 'fun'
  26. local lua_util = require "lua_util"
  27. local ts = require("tableshape").types
  28. local M = "selectors"
  29. local E = {}
  30. local extractors = {
  31. ['id'] = {
  32. ['get_value'] = function(_, args)
  33. if args[1] then
  34. return args[1], 'string'
  35. end
  36. return '','string'
  37. end,
  38. ['description'] = [[Return value from function's argument or an empty string,
  39. For example, `id('Something')` returns a string 'Something']],
  40. ['args_schema'] = {ts.string:is_optional()}
  41. },
  42. -- Get source IP address
  43. ['ip'] = {
  44. ['get_value'] = function(task)
  45. local ip = task:get_ip()
  46. if ip and ip:is_valid() then return ip,'userdata' end
  47. return nil
  48. end,
  49. ['description'] = [[Get source IP address]],
  50. },
  51. -- Get MIME from
  52. ['from'] = {
  53. ['get_value'] = function(task, args)
  54. local from = task:get_from(args[1] or 0)
  55. if ((from or E)[1] or E).addr then
  56. return from[1],'table'
  57. end
  58. return nil
  59. end,
  60. ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`,
  61. uses any type by default)]],
  62. },
  63. ['rcpts'] = {
  64. ['get_value'] = function(task, args)
  65. local rcpts = task:get_recipients(args[1] or 0)
  66. if ((rcpts or E)[1] or E).addr then
  67. return rcpts,'table_list'
  68. end
  69. return nil
  70. end,
  71. ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`,
  72. uses any type by default)]],
  73. },
  74. -- Get country (ASN module must be executed first)
  75. ['country'] = {
  76. ['get_value'] = function(task)
  77. local country = task:get_mempool():get_variable('country')
  78. if not country then
  79. return nil
  80. else
  81. return country,'string'
  82. end
  83. end,
  84. ['description'] = [[Get country (ASN module must be executed first)]],
  85. },
  86. -- Get ASN number
  87. ['asn'] = {
  88. ['type'] = 'string',
  89. ['get_value'] = function(task)
  90. local asn = task:get_mempool():get_variable('asn')
  91. if not asn then
  92. return nil
  93. else
  94. return asn,'string'
  95. end
  96. end,
  97. ['description'] = [[Get AS number (ASN module must be executed first)]],
  98. },
  99. -- Get authenticated username
  100. ['user'] = {
  101. ['get_value'] = function(task)
  102. local auser = task:get_user()
  103. if not auser then
  104. return nil
  105. else
  106. return auser,'string'
  107. end
  108. end,
  109. ['description'] = 'Get authenticated user name',
  110. },
  111. -- Get principal recipient
  112. ['to'] = {
  113. ['get_value'] = function(task)
  114. return task:get_principal_recipient(),'string'
  115. end,
  116. ['description'] = 'Get principal recipient',
  117. },
  118. -- Get content digest
  119. ['digest'] = {
  120. ['get_value'] = function(task)
  121. return task:get_digest(),'string'
  122. end,
  123. ['description'] = 'Get content digest',
  124. },
  125. -- Get list of all attachments digests
  126. ['attachments'] = {
  127. ['get_value'] = function(task)
  128. local parts = task:get_parts() or E
  129. local digests = {}
  130. for _,p in ipairs(parts) do
  131. if p:get_filename() then
  132. table.insert(digests, p:get_digest())
  133. end
  134. end
  135. if #digests > 0 then
  136. return digests,'string_list'
  137. end
  138. return nil
  139. end,
  140. ['description'] = 'Get list of all attachments digests',
  141. },
  142. -- Get all attachments files
  143. ['files'] = {
  144. ['get_value'] = function(task)
  145. local parts = task:get_parts() or E
  146. local files = {}
  147. for _,p in ipairs(parts) do
  148. local fname = p:get_filename()
  149. if fname then
  150. table.insert(files, fname)
  151. end
  152. end
  153. if #files > 0 then
  154. return files,'string_list'
  155. end
  156. return nil
  157. end,
  158. ['description'] = 'Get all attachments files',
  159. },
  160. -- Get helo value
  161. ['helo'] = {
  162. ['get_value'] = function(task)
  163. return task:get_helo(),'string'
  164. end,
  165. ['description'] = 'Get helo value',
  166. },
  167. -- Get header with the name that is expected as an argument. Returns list of
  168. -- headers with this name
  169. ['header'] = {
  170. ['get_value'] = function(task, args)
  171. local strong = false
  172. if args[2] then
  173. if args[2]:match('strong') then
  174. strong = true
  175. end
  176. if args[2]:match('full') then
  177. return task:get_header_full(args[1], strong),'table_list'
  178. end
  179. return task:get_header(args[1], strong),'string'
  180. else
  181. return task:get_header(args[1]),'string'
  182. end
  183. end,
  184. ['description'] = [[Get header with the name that is expected as an argument.
  185. The optional second argument accepts list of flags:
  186. - `full`: returns all headers with this name with all data (like task:get_header_full())
  187. - `strong`: use case sensitive match when matching header's name]],
  188. ['args_schema'] = {ts.string,
  189. (ts.pattern("strong") + ts.pattern("full")):is_optional()}
  190. },
  191. -- Get list of received headers (returns list of tables)
  192. ['received'] = {
  193. ['get_value'] = function(task, args)
  194. local rh = task:get_received_headers()
  195. if args[1] and rh then
  196. return fun.map(function(r) return r[args[1]] end, rh), 'string_list'
  197. end
  198. return rh,'table_list'
  199. end,
  200. ['description'] = [[Get list of received headers.
  201. If no arguments specified, returns list of tables. Otherwise, selects a specific element,
  202. e.g. `by_hostname`]],
  203. },
  204. -- Get all urls
  205. ['urls'] = {
  206. ['get_value'] = function(task, args)
  207. local urls = task:get_urls()
  208. if args[1] and urls then
  209. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  210. end
  211. return urls,'userdata_list'
  212. end,
  213. ['description'] = [[Get list of all urls.
  214. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  215. e.g. `get_tld`]],
  216. },
  217. -- Get all emails
  218. ['emails'] = {
  219. ['get_value'] = function(task, args)
  220. local urls = task:get_emails()
  221. if args[1] and urls then
  222. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  223. end
  224. return urls,'userdata_list'
  225. end,
  226. ['description'] = [[Get list of all emails.
  227. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  228. e.g. `get_user`]],
  229. },
  230. -- Get specific pool var. The first argument must be variable name,
  231. -- the second argument is optional and defines the type (string by default)
  232. ['pool_var'] = {
  233. ['get_value'] = function(task, args)
  234. local type = args[2] or 'string'
  235. return task:get_mempool():get_variable(args[1], type),(type)
  236. end,
  237. ['description'] = [[Get specific pool var. The first argument must be variable name,
  238. the second argument is optional and defines the type (string by default)]],
  239. ['args_schema'] = {ts.string, ts.string:is_optional()}
  240. },
  241. -- Get specific HTTP request header. The first argument must be header name.
  242. ['request_header'] = {
  243. ['get_value'] = function(task, args)
  244. local hdr = task:get_request_header(args[1])
  245. if hdr then
  246. return tostring(hdr),'string'
  247. end
  248. return nil
  249. end,
  250. ['description'] = [[Get specific HTTP request header.
  251. The first argument must be header name.]],
  252. ['args_schema'] = {ts.string}
  253. },
  254. -- Get task date, optionally formatted
  255. ['time'] = {
  256. ['get_value'] = function(task, args)
  257. local what = args[1] or 'message'
  258. local dt = task:get_date{format = what, gmt = true}
  259. if dt then
  260. if args[2] then
  261. -- Should be in format !xxx, as dt is in GMT
  262. return os.date(args[2], dt),'string'
  263. end
  264. return tostring(dt),'string'
  265. end
  266. return nil
  267. end,
  268. ['description'] = [[Get task timestamp. The first argument is type:
  269. - `connect`: connection timestamp (default)
  270. - `message`: timestamp as defined by `Date` header
  271. The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
  272. ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(),
  273. ts.string:is_optional()}
  274. }
  275. }
  276. local function pure_type(ltype)
  277. return ltype:match('^(.*)_list$')
  278. end
  279. local transform_function = {
  280. -- Returns the lowercased string
  281. ['lower'] = {
  282. ['types'] = {
  283. ['string'] = true,
  284. },
  285. ['map_type'] = 'string',
  286. ['process'] = function(inp, _)
  287. return inp:lower(),'string'
  288. end,
  289. ['description'] = 'Returns the lowercased string',
  290. },
  291. -- Returns the first element
  292. ['first'] = {
  293. ['types'] = {
  294. ['list'] = true,
  295. },
  296. ['process'] = function(inp, t)
  297. return fun.head(inp),pure_type(t)
  298. end,
  299. ['description'] = 'Returns the first element',
  300. },
  301. -- Returns the last element
  302. ['last'] = {
  303. ['types'] = {
  304. ['list'] = true,
  305. },
  306. ['process'] = function(inp, t)
  307. return fun.nth(#inp, inp),pure_type(t)
  308. end,
  309. ['description'] = 'Returns the last element',
  310. },
  311. -- Returns the nth element
  312. ['nth'] = {
  313. ['types'] = {
  314. ['list'] = true,
  315. },
  316. ['process'] = function(inp, t, args)
  317. return fun.nth(args[1] or 1, inp),pure_type(t)
  318. end,
  319. ['description'] = 'Returns the nth element',
  320. ['args_schema'] = {ts.number + ts.string / tonumber}
  321. },
  322. ['take_n'] = {
  323. ['types'] = {
  324. ['list'] = true,
  325. },
  326. ['process'] = function(inp, t, args)
  327. return fun.take_n(args[1] or 1, inp),t
  328. end,
  329. ['description'] = 'Returns the n first elements',
  330. ['args_schema'] = {ts.number + ts.string / tonumber}
  331. },
  332. ['drop_n'] = {
  333. ['types'] = {
  334. ['list'] = true,
  335. },
  336. ['process'] = function(inp, t, args)
  337. return fun.drop_n(args[1] or 1, inp),t
  338. end,
  339. ['description'] = 'Returns list without the first n elements',
  340. ['args_schema'] = {ts.number + ts.string / tonumber}
  341. },
  342. -- Joins strings into a single string using separator in the argument
  343. ['join'] = {
  344. ['types'] = {
  345. ['string_list'] = true
  346. },
  347. ['process'] = function(inp, _, args)
  348. return table.concat(fun.totable(inp), args[1] or ''), 'string'
  349. end,
  350. ['description'] = 'Joins strings into a single string using separator in the argument',
  351. ['args_schema'] = {ts.string:is_optional()}
  352. },
  353. -- Create a digest from string or a list of strings
  354. ['digest'] = {
  355. ['types'] = {
  356. ['string'] = true
  357. },
  358. ['map_type'] = 'hash',
  359. ['process'] = function(inp, _, args)
  360. local hash = require 'rspamd_cryptobox_hash'
  361. local encoding = args[1] or 'hex'
  362. local ht = args[2] or 'blake2'
  363. local h = hash:create_specific(ht):update(inp)
  364. local s
  365. if encoding == 'hex' then
  366. s = h:hex()
  367. elseif encoding == 'base32' then
  368. s = h:base32()
  369. elseif encoding == 'base64' then
  370. s = h:base64()
  371. end
  372. return s,'string'
  373. end,
  374. ['description'] = [[Create a digest from a string.
  375. The first argument is encoding (`hex`, `base32`, `base64`),
  376. the second argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  377. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  378. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  379. },
  380. -- Extracts substring
  381. ['substring'] = {
  382. ['types'] = {
  383. ['string'] = true
  384. },
  385. ['map_type'] = 'string',
  386. ['process'] = function(inp, _, args)
  387. local start_pos = args[1] or 1
  388. local end_pos = args[2] or -1
  389. return inp:sub(start_pos, end_pos), 'string'
  390. end,
  391. ['description'] = 'Extracts substring',
  392. ['args_schema'] = {(ts.number + ts.string / tonumber):is_optional(),
  393. (ts.number + ts.string / tonumber):is_optional()}
  394. },
  395. -- Regexp matching
  396. ['regexp'] = {
  397. ['types'] = {
  398. ['string'] = true
  399. },
  400. ['map_type'] = 'string',
  401. ['process'] = function(inp, _, args)
  402. local rspamd_regexp = require "rspamd_regexp"
  403. local re = rspamd_regexp.create_cached(args[1])
  404. if not re then
  405. logger.errx('invalid regexp: %s', args[1])
  406. return nil
  407. end
  408. local res = re:search(inp, false, true)
  409. if res then
  410. if #res == 1 then
  411. return res[1],'string'
  412. end
  413. return res,'string_list'
  414. end
  415. return nil
  416. end,
  417. ['description'] = 'Regexp matching',
  418. ['args_schema'] = {ts.string}
  419. },
  420. -- Drops input value and return values from function's arguments or an empty string
  421. ['id'] = {
  422. ['types'] = {
  423. ['string'] = true,
  424. ['list'] = true,
  425. },
  426. ['map_type'] = 'string',
  427. ['process'] = function(_, _, args)
  428. if args[1] and args[2] then
  429. return fun.map(tostring, args),'string_list'
  430. elseif args[1] then
  431. return args[1],'string'
  432. end
  433. return '','string'
  434. end,
  435. ['description'] = 'Drops input value and return values from function\'s arguments or an empty string',
  436. ['args_schema'] = (ts.string + ts.array_of(ts.string)):is_optional()
  437. },
  438. -- Boolean function in, returns either nil or its input if input is in args list
  439. ['in'] = {
  440. ['types'] = {
  441. ['string'] = true,
  442. },
  443. ['map_type'] = 'string',
  444. ['process'] = function(inp, t, args)
  445. for _,a in ipairs(args) do if a == inp then return inp,t end end
  446. return nil
  447. end,
  448. ['description'] = [[Boolean function in.
  449. Returns either nil or its input if input is in args list]],
  450. ['args_schema'] = ts.array_of(ts.string)
  451. },
  452. ['not_in'] = {
  453. ['types'] = {
  454. ['string'] = true,
  455. },
  456. ['map_type'] = 'string',
  457. ['process'] = function(inp, t, args)
  458. for _,a in ipairs(args) do if a == inp then return nil end end
  459. return inp,t
  460. end,
  461. ['description'] = [[Boolean function not in.
  462. Returns either nil or its input if input is not in args list]],
  463. ['args_schema'] = ts.array_of(ts.string)
  464. },
  465. }
  466. local function process_selector(task, sel)
  467. local function allowed_type(t)
  468. if t == 'string' or t == 'text' or t == 'string_list' or t == 'text_list' then
  469. return true
  470. end
  471. return false
  472. end
  473. local function list_type(t)
  474. return pure_type(t)
  475. end
  476. local function implicit_tostring(t, ud_or_table)
  477. if t == 'table' then
  478. -- Table (very special)
  479. if ud_or_table.value then
  480. return ud_or_table.value,'string'
  481. elseif ud_or_table.addr then
  482. return ud_or_table.addr,'string'
  483. end
  484. return logger.slog("%s", ud_or_table),'string'
  485. else
  486. return tostring(ud_or_table),'string'
  487. end
  488. end
  489. local input,etype = sel.selector.get_value(task, sel.selector.args)
  490. if not input then
  491. lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name)
  492. return nil
  493. end
  494. lua_util.debugm(M, task, 'extracted %s, type %s',
  495. sel.selector.name, etype)
  496. local pipe = sel.processor_pipe or E
  497. if etype:match('^userdata') or etype:match('^table') then
  498. -- Apply userdata conversion first
  499. local first_elt = pipe[1]
  500. if first_elt and first_elt.method then
  501. -- Explicit conversion
  502. local meth = first_elt
  503. if meth.types[etype] then
  504. lua_util.debugm(M, task, 'apply method `%s` to %s',
  505. meth.name, etype)
  506. input,etype = meth.process(input, etype)
  507. else
  508. local pt = pure_type(etype)
  509. if meth.types[pt] then
  510. lua_util.debugm(M, task, 'map method `%s` to list of %s',
  511. meth.name, pt)
  512. input = fun.map(function(list_elt)
  513. local ret, _ = meth.process(list_elt, pt)
  514. return ret
  515. end, input)
  516. etype = 'string_list'
  517. end
  518. end
  519. -- Remove method from the pipeline
  520. pipe = fun.drop_n(1, pipe)
  521. else
  522. -- Implicit conversion
  523. local pt = pure_type(etype)
  524. if not pt then
  525. lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype)
  526. input = implicit_tostring(etype, input)
  527. etype = 'string'
  528. else
  529. lua_util.debugm(M, task, 'apply implicit map %s->string', pt)
  530. input = fun.map(function(list_elt)
  531. local ret = implicit_tostring(pt, list_elt)
  532. return ret
  533. end, input)
  534. etype = 'string_list'
  535. end
  536. end
  537. end
  538. -- Now we fold elements using left fold
  539. local function fold_function(acc, x)
  540. if acc == nil or acc[1] == nil then
  541. lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name)
  542. return nil
  543. end
  544. local value = acc[1]
  545. local t = acc[2]
  546. if not x.types[t] then
  547. local pt = pure_type(t)
  548. if pt and x.types['list'] then
  549. -- Generic list processor
  550. lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t)
  551. return {x.process(value, t, x.args)}
  552. elseif pt and x.map_type and x.types[pt] then
  553. local map_type = x.map_type .. '_list'
  554. lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s',
  555. x.name, pt, map_type)
  556. return {fun.map(function(list_elt)
  557. if not list_elt then return nil end
  558. local ret, _ = x.process(list_elt, pt, x.args)
  559. return ret
  560. end, value), map_type}
  561. end
  562. logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
  563. return nil
  564. end
  565. lua_util.debugm(M, task, 'apply %s to %s', x.name, t)
  566. return {x.process(value, t, x.args)}
  567. end
  568. local res = fun.foldl(fold_function,
  569. {input, etype},
  570. pipe)
  571. if not res or not res[1] then return nil end -- Pipeline failed
  572. if not allowed_type(res[2]) then
  573. -- Search for implicit conversion
  574. local pt = pure_type(res[2])
  575. if pt then
  576. lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt)
  577. res[1] = fun.map(function(e) return implicit_tostring(pt, e) end, res[1])
  578. res[2] = 'string_list'
  579. else
  580. res[1] = implicit_tostring(res[2], res[1])
  581. res[2] = 'string'
  582. end
  583. end
  584. if list_type(res[2]) then
  585. -- Convert to table as it might have a functional form
  586. res[1] = fun.totable(res[1])
  587. end
  588. lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1])
  589. return res[1]
  590. end
  591. local function make_grammar()
  592. local l = require "lpeg"
  593. local spc = l.S(" \t\n")^0
  594. local atom = l.C((l.R("az") + l.R("AZ") + l.R("09") + l.S("_-"))^1)
  595. local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P'\\' * 1))^0) * "'"
  596. local doublequoted_string = l.P '"' * l.C(((1 - l.S'"\r\n\f\\') + (l.P'\\' * 1))^0) * '"'
  597. local argument = atom + singlequoted_string + doublequoted_string
  598. local dot = l.P(".")
  599. local semicolon = l.P(":")
  600. local obrace = "(" * spc
  601. local ebrace = spc * ")"
  602. local comma = spc * "," * spc
  603. local sel_separator = spc * l.S";*" * spc
  604. return l.P{
  605. "LIST";
  606. LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR")))^0,
  607. EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD"))^-1 * (dot * l.V("PROCESSOR"))^0,
  608. PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  609. FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  610. METHOD = l.Ct(atom / function(e) return '__' .. e end * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  611. ARG_LIST = l.Ct((argument * comma^0)^0)
  612. }
  613. end
  614. local parser = make_grammar()
  615. --[[[
  616. -- @function lua_selectors.parse_selector(cfg, str)
  617. --]]
  618. exports.parse_selector = function(cfg, str)
  619. local parsed = {parser:match(str)}
  620. local output = {}
  621. if not parsed or not parsed[1] then return nil end
  622. local function check_args(name, schema, args)
  623. if schema then
  624. if getmetatable(schema) then
  625. -- Schema covers all arguments
  626. local res,err = schema:transform(args)
  627. if not res then
  628. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  629. return false
  630. else
  631. for i,elt in ipairs(res) do
  632. args[i] = elt
  633. end
  634. end
  635. else
  636. for i,selt in ipairs(schema) do
  637. local res,err = selt:transform(args[i])
  638. if err then
  639. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  640. return false
  641. else
  642. args[i] = res
  643. end
  644. end
  645. end
  646. end
  647. return true
  648. end
  649. -- Output AST format is the following:
  650. -- table of individual selectors
  651. -- each selector: list of functions
  652. -- each function: function name + optional list of arguments
  653. for _,sel in ipairs(parsed) do
  654. local res = {
  655. selector = {},
  656. processor_pipe = {},
  657. }
  658. local selector_tbl = sel[1]
  659. if not selector_tbl then
  660. logger.errx(cfg, 'no selector represented')
  661. return nil
  662. end
  663. if not extractors[selector_tbl[1]] then
  664. logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
  665. return nil
  666. end
  667. res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
  668. res.selector.name = selector_tbl[1]
  669. res.selector.args = selector_tbl[2] or E
  670. if not check_args(res.selector.name,
  671. res.selector.args_schema,
  672. res.selector.args) then
  673. return nil
  674. end
  675. lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
  676. res.selector.name, res.selector.args)
  677. local pipeline_error = false
  678. -- Now process processors pipe
  679. fun.each(function(proc_tbl)
  680. local proc_name = proc_tbl[1]
  681. if proc_name:match('^__') then
  682. -- Special case - method
  683. local method_name = proc_name:match('^__(.*)$')
  684. local processor = {
  685. name = method_name,
  686. method = true,
  687. args = proc_tbl[2] or E,
  688. types = {
  689. userdata = true,
  690. table = true,
  691. },
  692. map_type = 'string',
  693. process = function(inp, t, args)
  694. if t == 'userdata' then
  695. return inp[method_name](inp, args),string
  696. else
  697. -- Table
  698. return inp[method_name],string
  699. end
  700. end,
  701. }
  702. lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s',
  703. proc_name, res.selector.name, processor.args)
  704. table.insert(res.processor_pipe, processor)
  705. else
  706. if not transform_function[proc_name] then
  707. logger.errx(cfg, 'processor %s is unknown', proc_name)
  708. pipeline_error = true
  709. return nil
  710. end
  711. local processor = lua_util.shallowcopy(transform_function[proc_name])
  712. processor.name = proc_name
  713. processor.args = proc_tbl[2] or E
  714. if not check_args(processor.name, processor.args_schema, processor.args) then
  715. pipeline_error = true
  716. return nil
  717. end
  718. lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
  719. proc_name, res.selector.name, processor.args)
  720. table.insert(res.processor_pipe, processor)
  721. end
  722. end, fun.tail(sel))
  723. if pipeline_error then
  724. logger.errx(cfg, 'unknown or invalid processor used, exiting')
  725. return nil
  726. end
  727. table.insert(output, res)
  728. end
  729. return output
  730. end
  731. --[[[
  732. -- @function lua_selectors.register_extractor(cfg, name, selector)
  733. --]]
  734. exports.register_extractor = function(cfg, name, selector)
  735. if selector.get_value then
  736. if extractors[name] then
  737. logger.warnx(cfg, 'redefining selector %s', name)
  738. end
  739. extractors[name] = selector
  740. return true
  741. end
  742. logger.errx(cfg, 'bad selector %s', name)
  743. return false
  744. end
  745. --[[[
  746. -- @function lua_selectors.register_transform(cfg, name, transform)
  747. --]]
  748. exports.register_transform = function(cfg, name, transform)
  749. if transform.process and transform.types then
  750. if transform_function[name] then
  751. logger.warnx(cfg, 'redefining transform function %s', name)
  752. end
  753. transform_function[name] = transform
  754. return true
  755. end
  756. logger.errx(cfg, 'bad transform function %s', name)
  757. return false
  758. end
  759. --[[[
  760. -- @function lua_selectors.process_selectors(task, selectors_pipe)
  761. --]]
  762. exports.process_selectors = function(task, selectors_pipe)
  763. local ret = {}
  764. for _,sel in ipairs(selectors_pipe) do
  765. local r = process_selector(task, sel)
  766. -- If any element is nil, then the whole selector is nil
  767. if not r then return nil end
  768. table.insert(ret, r)
  769. end
  770. return ret
  771. end
  772. --[[[
  773. -- @function lua_selectors.combine_selectors(task, selectors, delimiter)
  774. --]]
  775. exports.combine_selectors = function(_, selectors, delimiter)
  776. if not delimiter then delimiter = '' end
  777. if not selectors then return nil end
  778. local all_strings = fun.all(function(s) return type(s) == 'string' end, selectors)
  779. if all_strings then
  780. return table.concat(selectors, delimiter)
  781. else
  782. -- We need to do a spill on each table selector
  783. -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
  784. local tbl = {}
  785. local res = {}
  786. for i,s in ipairs(selectors) do
  787. if type(s) == 'string' then
  788. rawset(tbl, i, fun.duplicate(s))
  789. elseif type(s) == 'userdata' then
  790. rawset(tbl, i, fun.duplicate(tostring(s)))
  791. else
  792. rawset(tbl, i, s)
  793. end
  794. end
  795. fun.each(function(...)
  796. table.insert(res, table.concat({...}, delimiter))
  797. end, fun.zip(lua_util.unpack(tbl)))
  798. return res
  799. end
  800. end
  801. --[[[
  802. -- @function lua_selectors.create_closure(cfg, selector_str, delimiter='')
  803. --]]
  804. exports.create_selector_closure = function(cfg, selector_str, delimiter)
  805. local selector = exports.parse_selector(cfg, selector_str)
  806. if not selector then
  807. return nil
  808. end
  809. return function(task)
  810. local res = exports.process_selectors(task, selector)
  811. if res then
  812. return exports.combine_selectors(nil, res, delimiter)
  813. end
  814. return nil
  815. end
  816. end
  817. local function display_selectors(tbl)
  818. return fun.tomap(fun.map(function(k,v)
  819. return k, fun.tomap(fun.filter(function(kk, vv)
  820. return type(vv) ~= 'function'
  821. end, v))
  822. end, tbl))
  823. end
  824. exports.list_extractors = function()
  825. return display_selectors(extractors)
  826. end
  827. exports.list_transforms = function()
  828. return display_selectors(transform_function)
  829. end
  830. return exports