You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This module contains 'selectors' implementation: code to extract data
  14. -- from Rspamd tasks and compose those together
  15. --
  16. -- Read more at https://rspamd.com/doc/configuration/selectors.html
  17. --[[[
  18. -- @module lua_selectors
  19. -- This module contains 'selectors' implementation: code to extract data
  20. -- from Rspamd tasks and compose those together.
  21. -- Typical selector looks like this: header(User).lower.substring(1, 2):ip
  22. --]]
  23. local exports = {
  24. maps = require "lua_selectors/maps"
  25. }
  26. local logger = require 'rspamd_logger'
  27. local fun = require 'fun'
  28. local lua_util = require "lua_util"
  29. local M = "selectors"
  30. local rspamd_text = require "rspamd_text"
  31. local unpack_function = table.unpack or unpack
  32. local E = {}
  33. local extractors = require "lua_selectors/extractors"
  34. local transform_function = require "lua_selectors/transforms"
  35. local text_cookie = rspamd_text.cookie
  36. local function pure_type(ltype)
  37. return ltype:match('^(.*)_list$')
  38. end
  39. local function implicit_tostring(t, ud_or_table)
  40. if t == 'table' then
  41. -- Table (very special)
  42. if ud_or_table.value then
  43. return ud_or_table.value, 'string'
  44. elseif ud_or_table.addr then
  45. return ud_or_table.addr, 'string'
  46. end
  47. return logger.slog("%s", ud_or_table), 'string'
  48. elseif (t == 'string' or t == 'text') and type(ud_or_table) == 'userdata' then
  49. if ud_or_table.cookie and ud_or_table.cookie == text_cookie then
  50. -- Preserve opaque
  51. return ud_or_table, 'string'
  52. else
  53. return tostring(ud_or_table), 'string'
  54. end
  55. elseif t ~= 'nil' then
  56. return tostring(ud_or_table), 'string'
  57. end
  58. return nil
  59. end
  60. local function process_selector(task, sel)
  61. local function allowed_type(t)
  62. if t == 'string' or t == 'string_list' then
  63. return true
  64. end
  65. return false
  66. end
  67. local function list_type(t)
  68. return pure_type(t)
  69. end
  70. local input, etype = sel.selector.get_value(task, sel.selector.args)
  71. if not input then
  72. lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name)
  73. return nil
  74. end
  75. lua_util.debugm(M, task, 'extracted %s, type %s',
  76. sel.selector.name, etype)
  77. local pipe = sel.processor_pipe or E
  78. local first_elt = pipe[1]
  79. if first_elt and (first_elt.method or
  80. fun.any(function(t)
  81. return t == 'userdata' or t == 'table'
  82. end, first_elt.types)) then
  83. -- Explicit conversion
  84. local meth = first_elt
  85. if meth.types[etype] then
  86. lua_util.debugm(M, task, 'apply method `%s` to %s',
  87. meth.name, etype)
  88. input, etype = meth.process(input, etype, meth.args)
  89. else
  90. local pt = pure_type(etype)
  91. if meth.types[pt] then
  92. lua_util.debugm(M, task, 'map method `%s` to list of %s',
  93. meth.name, pt)
  94. -- Map method to a list of inputs, excluding empty elements
  95. -- We need to fold it down here to get a proper type resolution
  96. input = fun.totable(fun.filter(function(map_elt, _)
  97. return map_elt
  98. end,
  99. fun.map(function(list_elt)
  100. local ret, ty = meth.process(list_elt, pt, meth.args)
  101. if ret then
  102. etype = ty
  103. end
  104. return ret
  105. end, input)))
  106. if input and etype then
  107. etype = etype .. "_list"
  108. else
  109. input = nil
  110. end
  111. end
  112. end
  113. -- Remove method from the pipeline
  114. pipe = fun.drop_n(1, pipe)
  115. elseif etype:match('^userdata') or etype:match('^table') then
  116. -- Implicit conversion
  117. local pt = pure_type(etype)
  118. if not pt then
  119. lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype)
  120. input = implicit_tostring(etype, input)
  121. etype = 'string'
  122. else
  123. lua_util.debugm(M, task, 'apply implicit map %s->string', pt)
  124. input = fun.filter(function(map_elt)
  125. return map_elt
  126. end,
  127. fun.map(function(list_elt)
  128. local ret = implicit_tostring(pt, list_elt)
  129. return ret
  130. end, input))
  131. etype = 'string_list'
  132. end
  133. else
  134. lua_util.debugm(M, task, 'avoid implicit conversion as the transformer accepts complex input')
  135. end
  136. -- Now we fold elements using left fold
  137. local function fold_function(acc, x)
  138. if acc == nil or acc[1] == nil then
  139. lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name)
  140. return nil
  141. end
  142. local value = acc[1]
  143. local t = acc[2]
  144. if not x.types[t] then
  145. local pt = pure_type(t)
  146. if pt and x.types['list'] then
  147. -- Generic list processor
  148. lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t)
  149. return { x.process(value, t, x.args) }
  150. elseif pt and x.map_type and x.types[pt] then
  151. local map_type = x.map_type .. '_list'
  152. lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s',
  153. x.name, pt, map_type)
  154. -- Apply map, filtering empty values
  155. return {
  156. fun.filter(function(map_elt)
  157. return map_elt
  158. end,
  159. fun.map(function(list_elt)
  160. if not list_elt then
  161. return nil
  162. end
  163. local ret, _ = x.process(list_elt, pt, x.args)
  164. return ret
  165. end, value)),
  166. map_type -- Returned type
  167. }
  168. end
  169. logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
  170. return nil
  171. end
  172. lua_util.debugm(M, task, 'apply %s to %s', x.name, t)
  173. return { x.process(value, t, x.args) }
  174. end
  175. local res = fun.foldl(fold_function,
  176. { input, etype },
  177. pipe)
  178. if not res or not res[1] then
  179. return nil
  180. end -- Pipeline failed
  181. if not allowed_type(res[2]) then
  182. -- Search for implicit conversion
  183. local pt = pure_type(res[2])
  184. if pt then
  185. lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt)
  186. res[1] = fun.map(function(e)
  187. return implicit_tostring(pt, e)
  188. end, res[1])
  189. res[2] = 'string_list'
  190. else
  191. res[1] = implicit_tostring(res[2], res[1])
  192. res[2] = 'string'
  193. end
  194. end
  195. if list_type(res[2]) then
  196. -- Convert to table as it might have a functional form
  197. res[1] = fun.totable(res[1])
  198. end
  199. lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1])
  200. return res[1]
  201. end
  202. local function make_grammar()
  203. local l = require "lpeg"
  204. local spc = l.S(" \t\n") ^ 0
  205. local cont = l.R("\128\191") -- continuation byte
  206. local utf8_high = l.R("\194\223") * cont
  207. + l.R("\224\239") * cont * cont
  208. + l.R("\240\244") * cont * cont * cont
  209. local atom_start = (l.R("az") + l.R("AZ") + l.R("09") + utf8_high + l.S "-") ^ 1
  210. local atom_end = (l.R("az") + l.R("AZ") + l.R("09") + l.S "-_" + utf8_high) ^ 1
  211. local atom_mid = (1 - l.S("'\r\n\f\\,)(}{= " .. '"')) ^ 1
  212. local atom_argument = l.C(atom_start * atom_mid ^ 0 * atom_end ^ 0) -- We allow more characters for the arguments
  213. local atom = l.C(atom_start * atom_end ^ 0) -- We are more strict about selector names itself
  214. local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P '\\' * 1)) ^ 0) * "'"
  215. local doublequoted_string = l.P '"' * l.C(((1 - l.S '"\r\n\f\\') + (l.P '\\' * 1)) ^ 0) * '"'
  216. local argument = atom_argument + singlequoted_string + doublequoted_string
  217. local dot = l.P(".")
  218. local semicolon = l.P(":")
  219. local obrace = "(" * spc
  220. local tbl_obrace = "{" * spc
  221. local eqsign = spc * "=" * spc
  222. local tbl_ebrace = spc * "}"
  223. local ebrace = spc * ")"
  224. local comma = spc * "," * spc
  225. local sel_separator = spc * l.S ";*" * spc
  226. return l.P {
  227. "LIST";
  228. LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR"))) ^ 0,
  229. EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD")) ^ -1 * (dot * l.V("PROCESSOR")) ^ 0,
  230. PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace) ^ 0),
  231. FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace) ^ 0),
  232. METHOD = l.Ct(atom / function(e)
  233. return '__' .. e
  234. end * spc * (obrace * l.V("ARG_LIST") * ebrace) ^ 0),
  235. ARG_LIST = l.Ct((l.V("ARG") * comma ^ 0) ^ 0),
  236. ARG = l.Cf(tbl_obrace * l.V("NAMED_ARG") * tbl_ebrace, rawset) + argument + l.V("LIST_ARGS"),
  237. NAMED_ARG = (l.Ct("") * l.Cg(argument * eqsign * (argument + l.V("LIST_ARGS")) * comma ^ 0) ^ 0),
  238. LIST_ARGS = l.Ct(tbl_obrace * l.V("LIST_ARG") * tbl_ebrace),
  239. LIST_ARG = l.Cg(argument * comma ^ 0) ^ 0,
  240. }
  241. end
  242. local parser = make_grammar()
  243. --[[[
  244. -- @function lua_selectors.parse_selector(cfg, str)
  245. --]]
  246. exports.parse_selector = function(cfg, str)
  247. local parsed = { parser:match(str) }
  248. local output = {}
  249. if not parsed or not parsed[1] then
  250. return nil
  251. end
  252. local function check_args(name, schema, args)
  253. if schema then
  254. if getmetatable(schema) then
  255. -- Schema covers all arguments
  256. local res, err = schema:transform(args)
  257. if not res then
  258. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  259. return false
  260. else
  261. for i, elt in ipairs(res) do
  262. args[i] = elt
  263. end
  264. end
  265. else
  266. for i, selt in ipairs(schema) do
  267. local res, err = selt:transform(args[i])
  268. if err then
  269. logger.errx(rspamd_config, 'invalid arguments for %s: argument number: %s, error: %s', name, i, err)
  270. return false
  271. else
  272. args[i] = res
  273. end
  274. end
  275. end
  276. end
  277. return true
  278. end
  279. -- Output AST format is the following:
  280. -- table of individual selectors
  281. -- each selector: list of functions
  282. -- each function: function name + optional list of arguments
  283. for _, sel in ipairs(parsed) do
  284. local res = {
  285. selector = {},
  286. processor_pipe = {},
  287. }
  288. local selector_tbl = sel[1]
  289. if not selector_tbl then
  290. logger.errx(cfg, 'no selector represented')
  291. return nil
  292. end
  293. if not extractors[selector_tbl[1]] then
  294. logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
  295. return nil
  296. end
  297. res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
  298. res.selector.name = selector_tbl[1]
  299. res.selector.args = selector_tbl[2] or E
  300. if not check_args(res.selector.name,
  301. res.selector.args_schema,
  302. res.selector.args) then
  303. return nil
  304. end
  305. lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
  306. res.selector.name, res.selector.args)
  307. local pipeline_error = false
  308. -- Now process processors pipe
  309. fun.each(function(proc_tbl)
  310. local proc_name = proc_tbl[1]
  311. if proc_name:match('^__') then
  312. -- Special case - method
  313. local method_name = proc_name:match('^__(.*)$')
  314. -- Check array indexing...
  315. if tonumber(method_name) then
  316. method_name = tonumber(method_name)
  317. end
  318. local processor = {
  319. name = tostring(method_name),
  320. method = true,
  321. args = proc_tbl[2] or E,
  322. types = {
  323. userdata = true,
  324. table = true,
  325. string = true,
  326. },
  327. map_type = 'string',
  328. process = function(inp, t, args)
  329. local ret
  330. if t == 'table' then
  331. -- Plain table field
  332. ret = inp[method_name]
  333. else
  334. -- We call method unpacking arguments and dropping all but the first result returned
  335. ret = (inp[method_name](inp, unpack_function(args or E)))
  336. end
  337. local ret_type = type(ret)
  338. if ret_type == 'nil' then
  339. return nil
  340. end
  341. -- Now apply types heuristic
  342. if ret_type == 'string' then
  343. return ret, 'string'
  344. elseif ret_type == 'table' then
  345. -- TODO: we need to ensure that 1) table is numeric 2) table has merely strings
  346. return ret, 'string_list'
  347. else
  348. return implicit_tostring(ret_type, ret)
  349. end
  350. end,
  351. }
  352. lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s',
  353. proc_name, res.selector.name, processor.args)
  354. table.insert(res.processor_pipe, processor)
  355. else
  356. if not transform_function[proc_name] then
  357. logger.errx(cfg, 'processor %s is unknown', proc_name)
  358. pipeline_error = proc_name
  359. return nil
  360. end
  361. local processor = lua_util.shallowcopy(transform_function[proc_name])
  362. processor.name = proc_name
  363. processor.args = proc_tbl[2] or E
  364. if not check_args(processor.name, processor.args_schema, processor.args) then
  365. pipeline_error = 'args schema for ' .. proc_name
  366. return nil
  367. end
  368. lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
  369. proc_name, res.selector.name, processor.args)
  370. table.insert(res.processor_pipe, processor)
  371. end
  372. end, fun.tail(sel))
  373. if pipeline_error then
  374. logger.errx(cfg, 'unknown or invalid processor used: "%s", exiting', pipeline_error)
  375. return nil
  376. end
  377. table.insert(output, res)
  378. end
  379. return output
  380. end
  381. --[[[
  382. -- @function lua_selectors.register_extractor(cfg, name, selector)
  383. --]]
  384. exports.register_extractor = function(cfg, name, selector)
  385. if selector.get_value then
  386. if extractors[name] then
  387. logger.warnx(cfg, 'redefining selector %s', name)
  388. end
  389. extractors[name] = selector
  390. return true
  391. end
  392. logger.errx(cfg, 'bad selector %s', name)
  393. return false
  394. end
  395. --[[[
  396. -- @function lua_selectors.register_transform(cfg, name, transform)
  397. --]]
  398. exports.register_transform = function(cfg, name, transform)
  399. if transform.process and transform.types then
  400. if transform_function[name] then
  401. logger.warnx(cfg, 'redefining transform function %s', name)
  402. end
  403. transform_function[name] = transform
  404. return true
  405. end
  406. logger.errx(cfg, 'bad transform function %s', name)
  407. return false
  408. end
  409. --[[[
  410. -- @function lua_selectors.process_selectors(task, selectors_pipe)
  411. --]]
  412. exports.process_selectors = function(task, selectors_pipe)
  413. local ret = {}
  414. for _, sel in ipairs(selectors_pipe) do
  415. local r = process_selector(task, sel)
  416. -- If any element is nil, then the whole selector is nil
  417. if not r then
  418. return nil
  419. end
  420. table.insert(ret, r)
  421. end
  422. return ret
  423. end
  424. --[[[
  425. -- @function lua_selectors.combine_selectors(task, selectors, delimiter)
  426. --]]
  427. exports.combine_selectors = function(_, selectors, delimiter)
  428. if not delimiter then
  429. delimiter = ''
  430. end
  431. if not selectors then
  432. return nil
  433. end
  434. local have_tables, have_userdata
  435. for _, s in ipairs(selectors) do
  436. if type(s) == 'table' then
  437. have_tables = true
  438. elseif type(s) == 'userdata' then
  439. have_userdata = true
  440. end
  441. end
  442. if not have_tables then
  443. if not have_userdata then
  444. return table.concat(selectors, delimiter)
  445. else
  446. return rspamd_text.fromtable(selectors, delimiter)
  447. end
  448. else
  449. -- We need to do a spill on each table selector and make a cortesian product
  450. -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
  451. local tbl = {}
  452. local res = {}
  453. for i, s in ipairs(selectors) do
  454. if type(s) == 'string' then
  455. rawset(tbl, i, fun.duplicate(s))
  456. elseif type(s) == 'userdata' then
  457. rawset(tbl, i, fun.duplicate(tostring(s)))
  458. else
  459. -- Raw table
  460. rawset(tbl, i, fun.map(tostring, s))
  461. end
  462. end
  463. fun.each(function(...)
  464. table.insert(res, table.concat({ ... }, delimiter))
  465. end, fun.zip(lua_util.unpack(tbl)))
  466. return res
  467. end
  468. end
  469. --[[[
  470. -- @function lua_selectors.flatten_selectors(selectors)
  471. -- Convert selectors to a flat table of elements
  472. --]]
  473. exports.flatten_selectors = function(_, selectors, _)
  474. local res = {}
  475. local function fill(tbl)
  476. for _, s in ipairs(tbl) do
  477. if type(s) == 'string' then
  478. rawset(res, #res + 1, s)
  479. elseif type(s) == 'userdata' then
  480. rawset(res, #res + 1, tostring(s))
  481. else
  482. fill(s)
  483. end
  484. end
  485. end
  486. fill(selectors)
  487. return res
  488. end
  489. --[[[
  490. -- @function lua_selectors.kv_table_from_pairs(selectors)
  491. -- Convert selectors to a table where the odd elements are keys and even are elements
  492. -- Similarly to make a map from (k, v) pairs list
  493. -- To specify the concrete constant keys, one can use the `id` extractor
  494. --]]
  495. exports.kv_table_from_pairs = function(log_obj, selectors, _)
  496. local res = {}
  497. local rspamd_logger = require "rspamd_logger"
  498. local function fill(tbl)
  499. local tbl_len = #tbl
  500. if tbl_len % 2 ~= 0 or tbl_len == 0 then
  501. rspamd_logger.errx(log_obj, "invalid invocation of the `kv_table_from_pairs`: table length is invalid %s",
  502. tbl_len)
  503. return
  504. end
  505. for i = 1, tbl_len, 2 do
  506. local k = tostring(tbl[i])
  507. local v = tbl[i + 1]
  508. if type(v) == 'string' then
  509. res[k] = v
  510. elseif type(v) == 'userdata' then
  511. res[k] = tostring(v)
  512. else
  513. res[k] = fun.totable(fun.map(function(elt)
  514. return tostring(elt)
  515. end, v))
  516. end
  517. end
  518. end
  519. fill(selectors)
  520. return res
  521. end
  522. --[[[
  523. -- @function lua_selectors.create_closure(log_obj, cfg, selector_str, delimiter, fn)
  524. -- Creates a closure from a string selector, using the specific combinator function
  525. --]]
  526. exports.create_selector_closure_fn = function(log_obj, cfg, selector_str, delimiter, fn)
  527. local selector = exports.parse_selector(cfg, selector_str)
  528. if not selector then
  529. return nil
  530. end
  531. return function(task)
  532. local res = exports.process_selectors(task, selector)
  533. if res then
  534. return fn(log_obj, res, delimiter)
  535. end
  536. return nil
  537. end
  538. end
  539. --[[[
  540. -- @function lua_selectors.create_closure(cfg, selector_str, delimiter='', flatten=false)
  541. -- Creates a closure from a string selector
  542. --]]
  543. exports.create_selector_closure = function(cfg, selector_str, delimiter, flatten)
  544. local combinator_fn = flatten and exports.flatten_selectors or exports.combine_selectors
  545. return exports.create_selector_closure_fn(nil, cfg, selector_str, delimiter, combinator_fn)
  546. end
  547. local function display_selectors(tbl)
  548. return fun.tomap(fun.map(function(k, v)
  549. return k, fun.tomap(fun.filter(function(kk, vv)
  550. return type(vv) ~= 'function'
  551. end, v))
  552. end, tbl))
  553. end
  554. exports.list_extractors = function()
  555. return display_selectors(extractors)
  556. end
  557. exports.list_transforms = function()
  558. return display_selectors(transform_function)
  559. end
  560. exports.add_map = function(name, map)
  561. if not exports.maps[name] then
  562. exports.maps[name] = map
  563. else
  564. logger.errx(rspamd_config, "duplicate map redefinition for the selectors: %s", name)
  565. end
  566. end
  567. -- Publish log target
  568. exports.M = M
  569. return exports