You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_selectors.lua 27KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This module contains 'selectors' implementation: code to extract data
  14. -- from Rspamd tasks and compose those together
  15. --
  16. -- Read more at https://rspamd.com/doc/configuration/selectors.html
  17. --[[[
  18. -- @module lua_selectors
  19. -- This module contains 'selectors' implementation: code to extract data
  20. -- from Rspamd tasks and compose those together.
  21. -- Typical selector looks like this: header(User).lower.substring(1, 2):ip
  22. --]]
  23. local exports = {}
  24. local logger = require 'rspamd_logger'
  25. local fun = require 'fun'
  26. local lua_util = require "lua_util"
  27. local ts = require("tableshape").types
  28. local M = "selectors"
  29. local E = {}
  30. local extractors = {
  31. ['id'] = {
  32. ['get_value'] = function(_, args)
  33. if args[1] then
  34. return args[1], 'string'
  35. end
  36. return '','string'
  37. end,
  38. ['description'] = [[Return value from function's argument or an empty string,
  39. For example, `id('Something')` returns a string 'Something']],
  40. ['args_schema'] = {ts.string:is_optional()}
  41. },
  42. -- Get source IP address
  43. ['ip'] = {
  44. ['get_value'] = function(task)
  45. local ip = task:get_ip()
  46. if ip and ip:is_valid() then return ip,'userdata' end
  47. return nil
  48. end,
  49. ['description'] = [[Get source IP address]],
  50. },
  51. -- Get MIME from
  52. ['from'] = {
  53. ['get_value'] = function(task, args)
  54. local from = task:get_from(args[1] or 0)
  55. if ((from or E)[1] or E).addr then
  56. return from[1],'table'
  57. end
  58. return nil
  59. end,
  60. ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`,
  61. uses any type by default)]],
  62. },
  63. ['rcpts'] = {
  64. ['get_value'] = function(task, args)
  65. local rcpts = task:get_recipients(args[1] or 0)
  66. if ((rcpts or E)[1] or E).addr then
  67. return rcpts,'table_list'
  68. end
  69. return nil
  70. end,
  71. ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`,
  72. uses any type by default)]],
  73. },
  74. -- Get country (ASN module must be executed first)
  75. ['country'] = {
  76. ['get_value'] = function(task)
  77. local country = task:get_mempool():get_variable('country')
  78. if not country then
  79. return nil
  80. else
  81. return country,'string'
  82. end
  83. end,
  84. ['description'] = [[Get country (ASN module must be executed first)]],
  85. },
  86. -- Get ASN number
  87. ['asn'] = {
  88. ['type'] = 'string',
  89. ['get_value'] = function(task)
  90. local asn = task:get_mempool():get_variable('asn')
  91. if not asn then
  92. return nil
  93. else
  94. return asn,'string'
  95. end
  96. end,
  97. ['description'] = [[Get AS number (ASN module must be executed first)]],
  98. },
  99. -- Get authenticated username
  100. ['user'] = {
  101. ['get_value'] = function(task)
  102. local auser = task:get_user()
  103. if not auser then
  104. return nil
  105. else
  106. return auser,'string'
  107. end
  108. end,
  109. ['description'] = 'Get authenticated user name',
  110. },
  111. -- Get principal recipient
  112. ['to'] = {
  113. ['get_value'] = function(task)
  114. return task:get_principal_recipient(),'string'
  115. end,
  116. ['description'] = 'Get principal recipient',
  117. },
  118. -- Get content digest
  119. ['digest'] = {
  120. ['get_value'] = function(task)
  121. return task:get_digest(),'string'
  122. end,
  123. ['description'] = 'Get content digest',
  124. },
  125. -- Get list of all attachments digests
  126. ['attachments'] = {
  127. ['get_value'] = function(task, args)
  128. local s
  129. local parts = task:get_parts() or E
  130. local digests = {}
  131. if #args > 0 then
  132. local rspamd_cryptobox = require "rspamd_cryptobox_hash"
  133. local encoding = args[1] or 'hex'
  134. local ht = args[2] or 'blake2'
  135. for _,p in ipairs(parts) do
  136. if p:get_filename() then
  137. local h = rspamd_cryptobox.create_specific(ht, p:get_content('raw_parsed'))
  138. if encoding == 'hex' then
  139. s = h:hex()
  140. elseif encoding == 'base32' then
  141. s = h:base32()
  142. elseif encoding == 'base64' then
  143. s = h:base64()
  144. end
  145. table.insert(digests, s)
  146. end
  147. end
  148. else
  149. for _,p in ipairs(parts) do
  150. if p:get_filename() then
  151. table.insert(digests, p:get_digest())
  152. end
  153. end
  154. end
  155. if #digests > 0 then
  156. return digests,'string_list'
  157. end
  158. return nil
  159. end,
  160. ['description'] = [[Get list of all attachments digests.
  161. The first optional argument is encoding (`hex`, `base32`, `base64`),
  162. the second optional argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  163. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  164. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  165. },
  166. -- Get all attachments files
  167. ['files'] = {
  168. ['get_value'] = function(task)
  169. local parts = task:get_parts() or E
  170. local files = {}
  171. for _,p in ipairs(parts) do
  172. local fname = p:get_filename()
  173. if fname then
  174. table.insert(files, fname)
  175. end
  176. end
  177. if #files > 0 then
  178. return files,'string_list'
  179. end
  180. return nil
  181. end,
  182. ['description'] = 'Get all attachments files',
  183. },
  184. -- Get helo value
  185. ['helo'] = {
  186. ['get_value'] = function(task)
  187. return task:get_helo(),'string'
  188. end,
  189. ['description'] = 'Get helo value',
  190. },
  191. -- Get header with the name that is expected as an argument. Returns list of
  192. -- headers with this name
  193. ['header'] = {
  194. ['get_value'] = function(task, args)
  195. local strong = false
  196. if args[2] then
  197. if args[2]:match('strong') then
  198. strong = true
  199. end
  200. if args[2]:match('full') then
  201. return task:get_header_full(args[1], strong),'table_list'
  202. end
  203. return task:get_header(args[1], strong),'string'
  204. else
  205. return task:get_header(args[1]),'string'
  206. end
  207. end,
  208. ['description'] = [[Get header with the name that is expected as an argument.
  209. The optional second argument accepts list of flags:
  210. - `full`: returns all headers with this name with all data (like task:get_header_full())
  211. - `strong`: use case sensitive match when matching header's name]],
  212. ['args_schema'] = {ts.string,
  213. (ts.pattern("strong") + ts.pattern("full")):is_optional()}
  214. },
  215. -- Get list of received headers (returns list of tables)
  216. ['received'] = {
  217. ['get_value'] = function(task, args)
  218. local rh = task:get_received_headers()
  219. if args[1] and rh then
  220. return fun.map(function(r) return r[args[1]] end, rh), 'string_list'
  221. end
  222. return rh,'table_list'
  223. end,
  224. ['description'] = [[Get list of received headers.
  225. If no arguments specified, returns list of tables. Otherwise, selects a specific element,
  226. e.g. `by_hostname`]],
  227. },
  228. -- Get all urls
  229. ['urls'] = {
  230. ['get_value'] = function(task, args)
  231. local urls = task:get_urls()
  232. if args[1] and urls then
  233. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  234. end
  235. return urls,'userdata_list'
  236. end,
  237. ['description'] = [[Get list of all urls.
  238. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  239. e.g. `get_tld`]],
  240. },
  241. -- Get all emails
  242. ['emails'] = {
  243. ['get_value'] = function(task, args)
  244. local urls = task:get_emails()
  245. if args[1] and urls then
  246. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  247. end
  248. return urls,'userdata_list'
  249. end,
  250. ['description'] = [[Get list of all emails.
  251. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  252. e.g. `get_user`]],
  253. },
  254. -- Get specific pool var. The first argument must be variable name,
  255. -- the second argument is optional and defines the type (string by default)
  256. ['pool_var'] = {
  257. ['get_value'] = function(task, args)
  258. local type = args[2] or 'string'
  259. return task:get_mempool():get_variable(args[1], type),(type)
  260. end,
  261. ['description'] = [[Get specific pool var. The first argument must be variable name,
  262. the second argument is optional and defines the type (string by default)]],
  263. ['args_schema'] = {ts.string, ts.string:is_optional()}
  264. },
  265. -- Get specific HTTP request header. The first argument must be header name.
  266. ['request_header'] = {
  267. ['get_value'] = function(task, args)
  268. local hdr = task:get_request_header(args[1])
  269. if hdr then
  270. return tostring(hdr),'string'
  271. end
  272. return nil
  273. end,
  274. ['description'] = [[Get specific HTTP request header.
  275. The first argument must be header name.]],
  276. ['args_schema'] = {ts.string}
  277. },
  278. -- Get task date, optionally formatted
  279. ['time'] = {
  280. ['get_value'] = function(task, args)
  281. local what = args[1] or 'message'
  282. local dt = task:get_date{format = what, gmt = true}
  283. if dt then
  284. if args[2] then
  285. -- Should be in format !xxx, as dt is in GMT
  286. return os.date(args[2], dt),'string'
  287. end
  288. return tostring(dt),'string'
  289. end
  290. return nil
  291. end,
  292. ['description'] = [[Get task timestamp. The first argument is type:
  293. - `connect`: connection timestamp (default)
  294. - `message`: timestamp as defined by `Date` header
  295. The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
  296. ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(),
  297. ts.string:is_optional()}
  298. }
  299. }
  300. local function pure_type(ltype)
  301. return ltype:match('^(.*)_list$')
  302. end
  303. local transform_function = {
  304. -- Returns the lowercased string
  305. ['lower'] = {
  306. ['types'] = {
  307. ['string'] = true,
  308. },
  309. ['map_type'] = 'string',
  310. ['process'] = function(inp, _)
  311. return inp:lower(),'string'
  312. end,
  313. ['description'] = 'Returns the lowercased string',
  314. },
  315. -- Returns the first element
  316. ['first'] = {
  317. ['types'] = {
  318. ['list'] = true,
  319. },
  320. ['process'] = function(inp, t)
  321. return fun.head(inp),pure_type(t)
  322. end,
  323. ['description'] = 'Returns the first element',
  324. },
  325. -- Returns the last element
  326. ['last'] = {
  327. ['types'] = {
  328. ['list'] = true,
  329. },
  330. ['process'] = function(inp, t)
  331. return fun.nth(#inp, inp),pure_type(t)
  332. end,
  333. ['description'] = 'Returns the last element',
  334. },
  335. -- Returns the nth element
  336. ['nth'] = {
  337. ['types'] = {
  338. ['list'] = true,
  339. },
  340. ['process'] = function(inp, t, args)
  341. return fun.nth(args[1] or 1, inp),pure_type(t)
  342. end,
  343. ['description'] = 'Returns the nth element',
  344. ['args_schema'] = {ts.number + ts.string / tonumber}
  345. },
  346. ['take_n'] = {
  347. ['types'] = {
  348. ['list'] = true,
  349. },
  350. ['process'] = function(inp, t, args)
  351. return fun.take_n(args[1] or 1, inp),t
  352. end,
  353. ['description'] = 'Returns the n first elements',
  354. ['args_schema'] = {ts.number + ts.string / tonumber}
  355. },
  356. ['drop_n'] = {
  357. ['types'] = {
  358. ['list'] = true,
  359. },
  360. ['process'] = function(inp, t, args)
  361. return fun.drop_n(args[1] or 1, inp),t
  362. end,
  363. ['description'] = 'Returns list without the first n elements',
  364. ['args_schema'] = {ts.number + ts.string / tonumber}
  365. },
  366. -- Joins strings into a single string using separator in the argument
  367. ['join'] = {
  368. ['types'] = {
  369. ['string_list'] = true
  370. },
  371. ['process'] = function(inp, _, args)
  372. return table.concat(fun.totable(inp), args[1] or ''), 'string'
  373. end,
  374. ['description'] = 'Joins strings into a single string using separator in the argument',
  375. ['args_schema'] = {ts.string:is_optional()}
  376. },
  377. -- Create a digest from string or a list of strings
  378. ['digest'] = {
  379. ['types'] = {
  380. ['string'] = true
  381. },
  382. ['map_type'] = 'hash',
  383. ['process'] = function(inp, _, args)
  384. local hash = require 'rspamd_cryptobox_hash'
  385. local encoding = args[1] or 'hex'
  386. local ht = args[2] or 'blake2'
  387. local h = hash:create_specific(ht):update(inp)
  388. local s
  389. if encoding == 'hex' then
  390. s = h:hex()
  391. elseif encoding == 'base32' then
  392. s = h:base32()
  393. elseif encoding == 'base64' then
  394. s = h:base64()
  395. end
  396. return s,'string'
  397. end,
  398. ['description'] = [[Create a digest from a string.
  399. The first argument is encoding (`hex`, `base32`, `base64`),
  400. the second argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  401. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  402. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  403. },
  404. -- Extracts substring
  405. ['substring'] = {
  406. ['types'] = {
  407. ['string'] = true
  408. },
  409. ['map_type'] = 'string',
  410. ['process'] = function(inp, _, args)
  411. local start_pos = args[1] or 1
  412. local end_pos = args[2] or -1
  413. return inp:sub(start_pos, end_pos), 'string'
  414. end,
  415. ['description'] = 'Extracts substring',
  416. ['args_schema'] = {(ts.number + ts.string / tonumber):is_optional(),
  417. (ts.number + ts.string / tonumber):is_optional()}
  418. },
  419. -- Regexp matching
  420. ['regexp'] = {
  421. ['types'] = {
  422. ['string'] = true
  423. },
  424. ['map_type'] = 'string',
  425. ['process'] = function(inp, _, args)
  426. local rspamd_regexp = require "rspamd_regexp"
  427. local re = rspamd_regexp.create_cached(args[1])
  428. if not re then
  429. logger.errx('invalid regexp: %s', args[1])
  430. return nil
  431. end
  432. local res = re:search(inp, false, true)
  433. if res then
  434. if #res == 1 then
  435. return res[1],'string'
  436. end
  437. return res,'string_list'
  438. end
  439. return nil
  440. end,
  441. ['description'] = 'Regexp matching',
  442. ['args_schema'] = {ts.string}
  443. },
  444. -- Drops input value and return values from function's arguments or an empty string
  445. ['id'] = {
  446. ['types'] = {
  447. ['string'] = true,
  448. ['list'] = true,
  449. },
  450. ['map_type'] = 'string',
  451. ['process'] = function(_, _, args)
  452. if args[1] and args[2] then
  453. return fun.map(tostring, args),'string_list'
  454. elseif args[1] then
  455. return args[1],'string'
  456. end
  457. return '','string'
  458. end,
  459. ['description'] = 'Drops input value and return values from function\'s arguments or an empty string',
  460. ['args_schema'] = (ts.string + ts.array_of(ts.string)):is_optional()
  461. },
  462. -- Boolean function in, returns either nil or its input if input is in args list
  463. ['in'] = {
  464. ['types'] = {
  465. ['string'] = true,
  466. },
  467. ['map_type'] = 'string',
  468. ['process'] = function(inp, t, args)
  469. for _,a in ipairs(args) do if a == inp then return inp,t end end
  470. return nil
  471. end,
  472. ['description'] = [[Boolean function in.
  473. Returns either nil or its input if input is in args list]],
  474. ['args_schema'] = ts.array_of(ts.string)
  475. },
  476. ['not_in'] = {
  477. ['types'] = {
  478. ['string'] = true,
  479. },
  480. ['map_type'] = 'string',
  481. ['process'] = function(inp, t, args)
  482. for _,a in ipairs(args) do if a == inp then return nil end end
  483. return inp,t
  484. end,
  485. ['description'] = [[Boolean function not in.
  486. Returns either nil or its input if input is not in args list]],
  487. ['args_schema'] = ts.array_of(ts.string)
  488. },
  489. }
  490. local function process_selector(task, sel)
  491. local function allowed_type(t)
  492. if t == 'string' or t == 'text' or t == 'string_list' or t == 'text_list' then
  493. return true
  494. end
  495. return false
  496. end
  497. local function list_type(t)
  498. return pure_type(t)
  499. end
  500. local function implicit_tostring(t, ud_or_table)
  501. if t == 'table' then
  502. -- Table (very special)
  503. if ud_or_table.value then
  504. return ud_or_table.value,'string'
  505. elseif ud_or_table.addr then
  506. return ud_or_table.addr,'string'
  507. end
  508. return logger.slog("%s", ud_or_table),'string'
  509. else
  510. return tostring(ud_or_table),'string'
  511. end
  512. end
  513. local input,etype = sel.selector.get_value(task, sel.selector.args)
  514. if not input then
  515. lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name)
  516. return nil
  517. end
  518. lua_util.debugm(M, task, 'extracted %s, type %s',
  519. sel.selector.name, etype)
  520. local pipe = sel.processor_pipe or E
  521. if etype:match('^userdata') or etype:match('^table') then
  522. -- Apply userdata conversion first
  523. local first_elt = pipe[1]
  524. if first_elt and first_elt.method then
  525. -- Explicit conversion
  526. local meth = first_elt
  527. if meth.types[etype] then
  528. lua_util.debugm(M, task, 'apply method `%s` to %s',
  529. meth.name, etype)
  530. input,etype = meth.process(input, etype)
  531. else
  532. local pt = pure_type(etype)
  533. if meth.types[pt] then
  534. lua_util.debugm(M, task, 'map method `%s` to list of %s',
  535. meth.name, pt)
  536. input = fun.map(function(list_elt)
  537. local ret, _ = meth.process(list_elt, pt)
  538. return ret
  539. end, input)
  540. etype = 'string_list'
  541. end
  542. end
  543. -- Remove method from the pipeline
  544. pipe = fun.drop_n(1, pipe)
  545. else
  546. -- Implicit conversion
  547. local pt = pure_type(etype)
  548. if not pt then
  549. lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype)
  550. input = implicit_tostring(etype, input)
  551. etype = 'string'
  552. else
  553. lua_util.debugm(M, task, 'apply implicit map %s->string', pt)
  554. input = fun.map(function(list_elt)
  555. local ret = implicit_tostring(pt, list_elt)
  556. return ret
  557. end, input)
  558. etype = 'string_list'
  559. end
  560. end
  561. end
  562. -- Now we fold elements using left fold
  563. local function fold_function(acc, x)
  564. if acc == nil or acc[1] == nil then
  565. lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name)
  566. return nil
  567. end
  568. local value = acc[1]
  569. local t = acc[2]
  570. if not x.types[t] then
  571. local pt = pure_type(t)
  572. if pt and x.types['list'] then
  573. -- Generic list processor
  574. lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t)
  575. return {x.process(value, t, x.args)}
  576. elseif pt and x.map_type and x.types[pt] then
  577. local map_type = x.map_type .. '_list'
  578. lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s',
  579. x.name, pt, map_type)
  580. return {fun.map(function(list_elt)
  581. if not list_elt then return nil end
  582. local ret, _ = x.process(list_elt, pt, x.args)
  583. return ret
  584. end, value), map_type}
  585. end
  586. logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
  587. return nil
  588. end
  589. lua_util.debugm(M, task, 'apply %s to %s', x.name, t)
  590. return {x.process(value, t, x.args)}
  591. end
  592. local res = fun.foldl(fold_function,
  593. {input, etype},
  594. pipe)
  595. if not res or not res[1] then return nil end -- Pipeline failed
  596. if not allowed_type(res[2]) then
  597. -- Search for implicit conversion
  598. local pt = pure_type(res[2])
  599. if pt then
  600. lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt)
  601. res[1] = fun.map(function(e) return implicit_tostring(pt, e) end, res[1])
  602. res[2] = 'string_list'
  603. else
  604. res[1] = implicit_tostring(res[2], res[1])
  605. res[2] = 'string'
  606. end
  607. end
  608. if list_type(res[2]) then
  609. -- Convert to table as it might have a functional form
  610. res[1] = fun.totable(res[1])
  611. end
  612. lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1])
  613. return res[1]
  614. end
  615. local function make_grammar()
  616. local l = require "lpeg"
  617. local spc = l.S(" \t\n")^0
  618. local atom = l.C((l.R("az") + l.R("AZ") + l.R("09") + l.S("_-"))^1)
  619. local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P'\\' * 1))^0) * "'"
  620. local doublequoted_string = l.P '"' * l.C(((1 - l.S'"\r\n\f\\') + (l.P'\\' * 1))^0) * '"'
  621. local argument = atom + singlequoted_string + doublequoted_string
  622. local dot = l.P(".")
  623. local semicolon = l.P(":")
  624. local obrace = "(" * spc
  625. local ebrace = spc * ")"
  626. local comma = spc * "," * spc
  627. local sel_separator = spc * l.S";*" * spc
  628. return l.P{
  629. "LIST";
  630. LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR")))^0,
  631. EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD"))^-1 * (dot * l.V("PROCESSOR"))^0,
  632. PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  633. FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  634. METHOD = l.Ct(atom / function(e) return '__' .. e end * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  635. ARG_LIST = l.Ct((argument * comma^0)^0)
  636. }
  637. end
  638. local parser = make_grammar()
  639. --[[[
  640. -- @function lua_selectors.parse_selector(cfg, str)
  641. --]]
  642. exports.parse_selector = function(cfg, str)
  643. local parsed = {parser:match(str)}
  644. local output = {}
  645. if not parsed or not parsed[1] then return nil end
  646. local function check_args(name, schema, args)
  647. if schema then
  648. if getmetatable(schema) then
  649. -- Schema covers all arguments
  650. local res,err = schema:transform(args)
  651. if not res then
  652. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  653. return false
  654. else
  655. for i,elt in ipairs(res) do
  656. args[i] = elt
  657. end
  658. end
  659. else
  660. for i,selt in ipairs(schema) do
  661. local res,err = selt:transform(args[i])
  662. if err then
  663. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  664. return false
  665. else
  666. args[i] = res
  667. end
  668. end
  669. end
  670. end
  671. return true
  672. end
  673. -- Output AST format is the following:
  674. -- table of individual selectors
  675. -- each selector: list of functions
  676. -- each function: function name + optional list of arguments
  677. for _,sel in ipairs(parsed) do
  678. local res = {
  679. selector = {},
  680. processor_pipe = {},
  681. }
  682. local selector_tbl = sel[1]
  683. if not selector_tbl then
  684. logger.errx(cfg, 'no selector represented')
  685. return nil
  686. end
  687. if not extractors[selector_tbl[1]] then
  688. logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
  689. return nil
  690. end
  691. res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
  692. res.selector.name = selector_tbl[1]
  693. res.selector.args = selector_tbl[2] or E
  694. if not check_args(res.selector.name,
  695. res.selector.args_schema,
  696. res.selector.args) then
  697. return nil
  698. end
  699. lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
  700. res.selector.name, res.selector.args)
  701. local pipeline_error = false
  702. -- Now process processors pipe
  703. fun.each(function(proc_tbl)
  704. local proc_name = proc_tbl[1]
  705. if proc_name:match('^__') then
  706. -- Special case - method
  707. local method_name = proc_name:match('^__(.*)$')
  708. local processor = {
  709. name = method_name,
  710. method = true,
  711. args = proc_tbl[2] or E,
  712. types = {
  713. userdata = true,
  714. table = true,
  715. },
  716. map_type = 'string',
  717. process = function(inp, t, args)
  718. if t == 'userdata' then
  719. return inp[method_name](inp, args),'string'
  720. else
  721. -- Table
  722. return inp[method_name],'string'
  723. end
  724. end,
  725. }
  726. lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s',
  727. proc_name, res.selector.name, processor.args)
  728. table.insert(res.processor_pipe, processor)
  729. else
  730. if not transform_function[proc_name] then
  731. logger.errx(cfg, 'processor %s is unknown', proc_name)
  732. pipeline_error = true
  733. return nil
  734. end
  735. local processor = lua_util.shallowcopy(transform_function[proc_name])
  736. processor.name = proc_name
  737. processor.args = proc_tbl[2] or E
  738. if not check_args(processor.name, processor.args_schema, processor.args) then
  739. pipeline_error = true
  740. return nil
  741. end
  742. lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
  743. proc_name, res.selector.name, processor.args)
  744. table.insert(res.processor_pipe, processor)
  745. end
  746. end, fun.tail(sel))
  747. if pipeline_error then
  748. logger.errx(cfg, 'unknown or invalid processor used, exiting')
  749. return nil
  750. end
  751. table.insert(output, res)
  752. end
  753. return output
  754. end
  755. --[[[
  756. -- @function lua_selectors.register_extractor(cfg, name, selector)
  757. --]]
  758. exports.register_extractor = function(cfg, name, selector)
  759. if selector.get_value then
  760. if extractors[name] then
  761. logger.warnx(cfg, 'redefining selector %s', name)
  762. end
  763. extractors[name] = selector
  764. return true
  765. end
  766. logger.errx(cfg, 'bad selector %s', name)
  767. return false
  768. end
  769. --[[[
  770. -- @function lua_selectors.register_transform(cfg, name, transform)
  771. --]]
  772. exports.register_transform = function(cfg, name, transform)
  773. if transform.process and transform.types then
  774. if transform_function[name] then
  775. logger.warnx(cfg, 'redefining transform function %s', name)
  776. end
  777. transform_function[name] = transform
  778. return true
  779. end
  780. logger.errx(cfg, 'bad transform function %s', name)
  781. return false
  782. end
  783. --[[[
  784. -- @function lua_selectors.process_selectors(task, selectors_pipe)
  785. --]]
  786. exports.process_selectors = function(task, selectors_pipe)
  787. local ret = {}
  788. for _,sel in ipairs(selectors_pipe) do
  789. local r = process_selector(task, sel)
  790. -- If any element is nil, then the whole selector is nil
  791. if not r then return nil end
  792. table.insert(ret, r)
  793. end
  794. return ret
  795. end
  796. --[[[
  797. -- @function lua_selectors.combine_selectors(task, selectors, delimiter)
  798. --]]
  799. exports.combine_selectors = function(_, selectors, delimiter)
  800. if not delimiter then delimiter = '' end
  801. if not selectors then return nil end
  802. local all_strings = fun.all(function(s) return type(s) == 'string' end, selectors)
  803. if all_strings then
  804. return table.concat(selectors, delimiter)
  805. else
  806. -- We need to do a spill on each table selector
  807. -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
  808. local tbl = {}
  809. local res = {}
  810. for i,s in ipairs(selectors) do
  811. if type(s) == 'string' then
  812. rawset(tbl, i, fun.duplicate(s))
  813. elseif type(s) == 'userdata' then
  814. rawset(tbl, i, fun.duplicate(tostring(s)))
  815. else
  816. rawset(tbl, i, s)
  817. end
  818. end
  819. fun.each(function(...)
  820. table.insert(res, table.concat({...}, delimiter))
  821. end, fun.zip(lua_util.unpack(tbl)))
  822. return res
  823. end
  824. end
  825. --[[[
  826. -- @function lua_selectors.create_closure(cfg, selector_str, delimiter='')
  827. --]]
  828. exports.create_selector_closure = function(cfg, selector_str, delimiter)
  829. local selector = exports.parse_selector(cfg, selector_str)
  830. if not selector then
  831. return nil
  832. end
  833. return function(task)
  834. local res = exports.process_selectors(task, selector)
  835. if res then
  836. return exports.combine_selectors(nil, res, delimiter)
  837. end
  838. return nil
  839. end
  840. end
  841. local function display_selectors(tbl)
  842. return fun.tomap(fun.map(function(k,v)
  843. return k, fun.tomap(fun.filter(function(kk, vv)
  844. return type(vv) ~= 'function'
  845. end, v))
  846. end, tbl))
  847. end
  848. exports.list_extractors = function()
  849. return display_selectors(extractors)
  850. end
  851. exports.list_transforms = function()
  852. return display_selectors(transform_function)
  853. end
  854. return exports