You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

init.lua 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This module contains 'selectors' implementation: code to extract data
  14. -- from Rspamd tasks and compose those together
  15. --
  16. -- Read more at https://rspamd.com/doc/configuration/selectors.html
  17. --[[[
  18. -- @module lua_selectors
  19. -- This module contains 'selectors' implementation: code to extract data
  20. -- from Rspamd tasks and compose those together.
  21. -- Typical selector looks like this: header(User).lower.substring(1, 2):ip
  22. --]]
  23. local exports = {
  24. maps = require "lua_selectors/maps"
  25. }
  26. local logger = require 'rspamd_logger'
  27. local fun = require 'fun'
  28. local lua_util = require "lua_util"
  29. local M = "selectors"
  30. local rspamd_text = require "rspamd_text"
  31. local unpack_function = table.unpack or unpack
  32. local E = {}
  33. local extractors = require "lua_selectors/extractors"
  34. local transform_function = require "lua_selectors/transforms"
  35. local text_cookie = rspamd_text.cookie
  36. local function pure_type(ltype)
  37. return ltype:match('^(.*)_list$')
  38. end
  39. local function implicit_tostring(t, ud_or_table)
  40. if t == 'table' then
  41. -- Table (very special)
  42. if ud_or_table.value then
  43. return ud_or_table.value,'string'
  44. elseif ud_or_table.addr then
  45. return ud_or_table.addr,'string'
  46. end
  47. return logger.slog("%s", ud_or_table),'string'
  48. elseif t == 'userdata' then
  49. if t.cookie and t.cookie == text_cookie then
  50. -- Preserve opaque
  51. return ud_or_table,'string'
  52. else
  53. return tostring(ud_or_table),'string'
  54. end
  55. elseif t ~= 'nil' then
  56. return tostring(ud_or_table),'string'
  57. end
  58. return nil
  59. end
  60. local function process_selector(task, sel)
  61. local function allowed_type(t)
  62. if t == 'string' or t == 'text' or t == 'string_list' or t == 'text_list' then
  63. return true
  64. end
  65. return false
  66. end
  67. local function list_type(t)
  68. return pure_type(t)
  69. end
  70. local input,etype = sel.selector.get_value(task, sel.selector.args)
  71. if not input then
  72. lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name)
  73. return nil
  74. end
  75. lua_util.debugm(M, task, 'extracted %s, type %s',
  76. sel.selector.name, etype)
  77. local pipe = sel.processor_pipe or E
  78. local first_elt = pipe[1]
  79. if first_elt and first_elt.method then
  80. -- Explicit conversion
  81. local meth = first_elt
  82. if meth.types[etype] then
  83. lua_util.debugm(M, task, 'apply method `%s` to %s',
  84. meth.name, etype)
  85. input,etype = meth.process(input, etype, meth.args)
  86. else
  87. local pt = pure_type(etype)
  88. if meth.types[pt] then
  89. lua_util.debugm(M, task, 'map method `%s` to list of %s',
  90. meth.name, pt)
  91. -- Map method to a list of inputs, excluding empty elements
  92. input = fun.filter(function(map_elt) return map_elt end,
  93. fun.map(function(list_elt)
  94. local ret, _ = meth.process(list_elt, pt)
  95. return ret
  96. end, input))
  97. etype = 'string_list'
  98. end
  99. end
  100. -- Remove method from the pipeline
  101. pipe = fun.drop_n(1, pipe)
  102. elseif etype:match('^userdata') or etype:match('^table') then
  103. -- Implicit conversion
  104. local pt = pure_type(etype)
  105. if not pt then
  106. lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype)
  107. input = implicit_tostring(etype, input)
  108. etype = 'string'
  109. else
  110. lua_util.debugm(M, task, 'apply implicit map %s->string', pt)
  111. input = fun.filter(function(map_elt) return map_elt end,
  112. fun.map(function(list_elt)
  113. local ret = implicit_tostring(pt, list_elt)
  114. return ret
  115. end, input))
  116. etype = 'string_list'
  117. end
  118. end
  119. -- Now we fold elements using left fold
  120. local function fold_function(acc, x)
  121. if acc == nil or acc[1] == nil then
  122. lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name)
  123. return nil
  124. end
  125. local value = acc[1]
  126. local t = acc[2]
  127. if not x.types[t] then
  128. local pt = pure_type(t)
  129. if pt and x.types['list'] then
  130. -- Generic list processor
  131. lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t)
  132. return {x.process(value, t, x.args)}
  133. elseif pt and x.map_type and x.types[pt] then
  134. local map_type = x.map_type .. '_list'
  135. lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s',
  136. x.name, pt, map_type)
  137. -- Apply map, filtering empty values
  138. return {
  139. fun.filter(function(map_elt) return map_elt end,
  140. fun.map(function(list_elt)
  141. if not list_elt then return nil end
  142. local ret, _ = x.process(list_elt, pt, x.args)
  143. return ret
  144. end, value)),
  145. map_type -- Returned type
  146. }
  147. end
  148. logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
  149. return nil
  150. end
  151. lua_util.debugm(M, task, 'apply %s to %s', x.name, t)
  152. return {x.process(value, t, x.args)}
  153. end
  154. local res = fun.foldl(fold_function,
  155. {input, etype},
  156. pipe)
  157. if not res or not res[1] then return nil end -- Pipeline failed
  158. if not allowed_type(res[2]) then
  159. -- Search for implicit conversion
  160. local pt = pure_type(res[2])
  161. if pt then
  162. lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt)
  163. res[1] = fun.map(function(e) return implicit_tostring(pt, e) end, res[1])
  164. res[2] = 'string_list'
  165. else
  166. res[1] = implicit_tostring(res[2], res[1])
  167. res[2] = 'string'
  168. end
  169. end
  170. if list_type(res[2]) then
  171. -- Convert to table as it might have a functional form
  172. res[1] = fun.totable(res[1])
  173. end
  174. lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1])
  175. return res[1]
  176. end
  177. local function make_grammar()
  178. local l = require "lpeg"
  179. local spc = l.S(" \t\n")^0
  180. local atom = l.C((l.R("az") + l.R("AZ") + l.R("09") + l.S("_-"))^1)
  181. local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P'\\' * 1))^0) * "'"
  182. local doublequoted_string = l.P '"' * l.C(((1 - l.S'"\r\n\f\\') + (l.P'\\' * 1))^0) * '"'
  183. local argument = atom + singlequoted_string + doublequoted_string
  184. local dot = l.P(".")
  185. local semicolon = l.P(":")
  186. local obrace = "(" * spc
  187. local tbl_obrace = "{" * spc
  188. local eqsign = spc * "=" * spc
  189. local tbl_ebrace = spc * "}"
  190. local ebrace = spc * ")"
  191. local comma = spc * "," * spc
  192. local sel_separator = spc * l.S";*" * spc
  193. return l.P{
  194. "LIST";
  195. LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR")))^0,
  196. EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD"))^-1 * (dot * l.V("PROCESSOR"))^0,
  197. PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  198. FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  199. METHOD = l.Ct(atom / function(e) return '__' .. e end * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  200. ARG_LIST = l.Ct((l.V("ARG") * comma^0)^0),
  201. ARG = l.Cf(tbl_obrace * l.V("NAMED_ARG") * tbl_ebrace, rawset) + argument + l.V("LIST_ARGS"),
  202. NAMED_ARG = (l.Ct("") * l.Cg(argument * eqsign * (argument + l.V("LIST_ARGS")) * comma^0)^0),
  203. LIST_ARGS = l.Ct(tbl_obrace * l.V("LIST_ARG") * tbl_ebrace),
  204. LIST_ARG = l.Cg(argument * comma^0)^0,
  205. }
  206. end
  207. local parser = make_grammar()
  208. --[[[
  209. -- @function lua_selectors.parse_selector(cfg, str)
  210. --]]
  211. exports.parse_selector = function(cfg, str)
  212. local parsed = {parser:match(str)}
  213. local output = {}
  214. if not parsed or not parsed[1] then return nil end
  215. local function check_args(name, schema, args)
  216. if schema then
  217. if getmetatable(schema) then
  218. -- Schema covers all arguments
  219. local res,err = schema:transform(args)
  220. if not res then
  221. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  222. return false
  223. else
  224. for i,elt in ipairs(res) do
  225. args[i] = elt
  226. end
  227. end
  228. else
  229. for i,selt in ipairs(schema) do
  230. local res,err = selt:transform(args[i])
  231. if err then
  232. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  233. return false
  234. else
  235. args[i] = res
  236. end
  237. end
  238. end
  239. end
  240. return true
  241. end
  242. -- Output AST format is the following:
  243. -- table of individual selectors
  244. -- each selector: list of functions
  245. -- each function: function name + optional list of arguments
  246. for _,sel in ipairs(parsed) do
  247. local res = {
  248. selector = {},
  249. processor_pipe = {},
  250. }
  251. local selector_tbl = sel[1]
  252. if not selector_tbl then
  253. logger.errx(cfg, 'no selector represented')
  254. return nil
  255. end
  256. if not extractors[selector_tbl[1]] then
  257. logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
  258. return nil
  259. end
  260. res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
  261. res.selector.name = selector_tbl[1]
  262. res.selector.args = selector_tbl[2] or E
  263. if not check_args(res.selector.name,
  264. res.selector.args_schema,
  265. res.selector.args) then
  266. return nil
  267. end
  268. lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
  269. res.selector.name, res.selector.args)
  270. local pipeline_error = false
  271. -- Now process processors pipe
  272. fun.each(function(proc_tbl)
  273. local proc_name = proc_tbl[1]
  274. if proc_name:match('^__') then
  275. -- Special case - method
  276. local method_name = proc_name:match('^__(.*)$')
  277. -- Check array indexing...
  278. if tonumber(method_name) then
  279. method_name = tonumber(method_name)
  280. end
  281. local processor = {
  282. name = tostring(method_name),
  283. method = true,
  284. args = proc_tbl[2] or E,
  285. types = {
  286. userdata = true,
  287. table = true,
  288. string = true,
  289. },
  290. map_type = 'string',
  291. process = function(inp, t, args)
  292. local ret
  293. if t == 'table' then
  294. -- Plain table field
  295. ret = inp[method_name]
  296. else
  297. -- We call method unpacking arguments and dropping all but the first result returned
  298. ret = (inp[method_name](inp, unpack_function(args or E)))
  299. end
  300. local ret_type = type(ret)
  301. if ret_type == 'nil' then return nil end
  302. -- Now apply types heuristic
  303. if ret_type == 'string' then
  304. return ret,'string'
  305. elseif ret_type == 'table' then
  306. -- TODO: we need to ensure that 1) table is numeric 2) table has merely strings
  307. return ret,'string_list'
  308. else
  309. return implicit_tostring(ret_type, ret)
  310. end
  311. end,
  312. }
  313. lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s',
  314. proc_name, res.selector.name, processor.args)
  315. table.insert(res.processor_pipe, processor)
  316. else
  317. if not transform_function[proc_name] then
  318. logger.errx(cfg, 'processor %s is unknown', proc_name)
  319. pipeline_error = proc_name
  320. return nil
  321. end
  322. local processor = lua_util.shallowcopy(transform_function[proc_name])
  323. processor.name = proc_name
  324. processor.args = proc_tbl[2] or E
  325. if not check_args(processor.name, processor.args_schema, processor.args) then
  326. pipeline_error = 'args schema for ' .. proc_name
  327. return nil
  328. end
  329. lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
  330. proc_name, res.selector.name, processor.args)
  331. table.insert(res.processor_pipe, processor)
  332. end
  333. end, fun.tail(sel))
  334. if pipeline_error then
  335. logger.errx(cfg, 'unknown or invalid processor used: "%s", exiting', pipeline_error)
  336. return nil
  337. end
  338. table.insert(output, res)
  339. end
  340. return output
  341. end
  342. --[[[
  343. -- @function lua_selectors.register_extractor(cfg, name, selector)
  344. --]]
  345. exports.register_extractor = function(cfg, name, selector)
  346. if selector.get_value then
  347. if extractors[name] then
  348. logger.warnx(cfg, 'redefining selector %s', name)
  349. end
  350. extractors[name] = selector
  351. return true
  352. end
  353. logger.errx(cfg, 'bad selector %s', name)
  354. return false
  355. end
  356. --[[[
  357. -- @function lua_selectors.register_transform(cfg, name, transform)
  358. --]]
  359. exports.register_transform = function(cfg, name, transform)
  360. if transform.process and transform.types then
  361. if transform_function[name] then
  362. logger.warnx(cfg, 'redefining transform function %s', name)
  363. end
  364. transform_function[name] = transform
  365. return true
  366. end
  367. logger.errx(cfg, 'bad transform function %s', name)
  368. return false
  369. end
  370. --[[[
  371. -- @function lua_selectors.process_selectors(task, selectors_pipe)
  372. --]]
  373. exports.process_selectors = function(task, selectors_pipe)
  374. local ret = {}
  375. for _,sel in ipairs(selectors_pipe) do
  376. local r = process_selector(task, sel)
  377. -- If any element is nil, then the whole selector is nil
  378. if not r then return nil end
  379. table.insert(ret, r)
  380. end
  381. return ret
  382. end
  383. --[[[
  384. -- @function lua_selectors.combine_selectors(task, selectors, delimiter)
  385. --]]
  386. exports.combine_selectors = function(_, selectors, delimiter)
  387. if not delimiter then delimiter = '' end
  388. if not selectors then return nil end
  389. local have_tables, have_userdata
  390. for _,s in ipairs(selectors) do
  391. if type(s) == 'table' then
  392. have_tables = true
  393. elseif type(s) == 'userdata' then
  394. have_userdata = true
  395. end
  396. end
  397. if not have_tables then
  398. if not have_userdata then
  399. return table.concat(selectors, delimiter)
  400. else
  401. return rspamd_text.fromtable(selectors, delimiter)
  402. end
  403. else
  404. -- We need to do a spill on each table selector and make a cortezian product
  405. -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
  406. local tbl = {}
  407. local res = {}
  408. for i,s in ipairs(selectors) do
  409. if type(s) == 'string' then
  410. rawset(tbl, i, fun.duplicate(s))
  411. elseif type(s) == 'userdata' then
  412. rawset(tbl, i, fun.duplicate(tostring(s)))
  413. else
  414. -- Raw table
  415. rawset(tbl, i, s)
  416. end
  417. end
  418. fun.each(function(...)
  419. table.insert(res, table.concat({...}, delimiter))
  420. end, fun.zip(lua_util.unpack(tbl)))
  421. return res
  422. end
  423. end
  424. --[[[
  425. -- @function lua_selectors.flatten_selectors(selectors)
  426. -- Convert selectors to a flat table of elements
  427. --]]
  428. exports.flatten_selectors = function(selectors)
  429. local res = {}
  430. local function fill(tbl)
  431. for _,s in ipairs(tbl) do
  432. if type(s) == 'string' then
  433. rawset(res, #res + 1, s)
  434. elseif type(s) == 'userdata' then
  435. rawset(res, #res + 1, tostring(s))
  436. else
  437. fill(s)
  438. end
  439. end
  440. end
  441. fill(selectors)
  442. return res
  443. end
  444. --[[[
  445. -- @function lua_selectors.create_closure(cfg, selector_str, delimiter='', flatten=false)
  446. --]]
  447. exports.create_selector_closure = function(cfg, selector_str, delimiter, flatten)
  448. local selector = exports.parse_selector(cfg, selector_str)
  449. if not selector then
  450. return nil
  451. end
  452. return function(task)
  453. local res = exports.process_selectors(task, selector)
  454. if res then
  455. if flatten then
  456. return exports.flatten_selectors(res)
  457. else
  458. return exports.combine_selectors(nil, res, delimiter)
  459. end
  460. end
  461. return nil
  462. end
  463. end
  464. local function display_selectors(tbl)
  465. return fun.tomap(fun.map(function(k,v)
  466. return k, fun.tomap(fun.filter(function(kk, vv)
  467. return type(vv) ~= 'function'
  468. end, v))
  469. end, tbl))
  470. end
  471. exports.list_extractors = function()
  472. return display_selectors(extractors)
  473. end
  474. exports.list_transforms = function()
  475. return display_selectors(transform_function)
  476. end
  477. return exports