您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符


  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This module contains 'selectors' implementation: code to extract data
  14. -- from Rspamd tasks and compose those together
  15. --
  16. -- Read more at https://rspamd.com/doc/configuration/selectors.html
  17. --[[[
  18. -- @module lua_selectors
  19. -- This module contains 'selectors' implementation: code to extract data
  20. -- from Rspamd tasks and compose those together.
  21. -- Typical selector looks like this: header(User).lower.substring(1, 2):ip
  22. --]]
  23. local exports = {
  24. maps = require "lua_selectors/maps"
  25. }
  26. local logger = require 'rspamd_logger'
  27. local fun = require 'fun'
  28. local lua_util = require "lua_util"
  29. local M = "selectors"
  30. local rspamd_text = require "rspamd_text"
  31. local unpack_function = table.unpack or unpack
  32. local E = {}
  33. local extractors = require "lua_selectors/extractors"
  34. local transform_function = require "lua_selectors/transforms"
  35. local text_cookie = rspamd_text.cookie
  36. local function pure_type(ltype)
  37. return ltype:match('^(.*)_list$')
  38. end
  39. local function implicit_tostring(t, ud_or_table)
  40. if t == 'table' then
  41. -- Table (very special)
  42. if ud_or_table.value then
  43. return ud_or_table.value,'string'
  44. elseif ud_or_table.addr then
  45. return ud_or_table.addr,'string'
  46. end
  47. return logger.slog("%s", ud_or_table),'string'
  48. elseif (t == 'string' or t == 'text') and type(ud_or_table) == 'userdata' then
  49. if ud_or_table.cookie and ud_or_table.cookie == text_cookie then
  50. -- Preserve opaque
  51. return ud_or_table,'string'
  52. else
  53. return tostring(ud_or_table),'string'
  54. end
  55. elseif t ~= 'nil' then
  56. return tostring(ud_or_table),'string'
  57. end
  58. return nil
  59. end
  60. local function process_selector(task, sel)
  61. local function allowed_type(t)
  62. if t == 'string' or t == 'string_list' then
  63. return true
  64. end
  65. return false
  66. end
  67. local function list_type(t)
  68. return pure_type(t)
  69. end
  70. local input,etype = sel.selector.get_value(task, sel.selector.args)
  71. if not input then
  72. lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name)
  73. return nil
  74. end
  75. lua_util.debugm(M, task, 'extracted %s, type %s',
  76. sel.selector.name, etype)
  77. local pipe = sel.processor_pipe or E
  78. local first_elt = pipe[1]
  79. if first_elt and first_elt.method then
  80. -- Explicit conversion
  81. local meth = first_elt
  82. if meth.types[etype] then
  83. lua_util.debugm(M, task, 'apply method `%s` to %s',
  84. meth.name, etype)
  85. input,etype = meth.process(input, etype, meth.args)
  86. else
  87. local pt = pure_type(etype)
  88. if meth.types[pt] then
  89. lua_util.debugm(M, task, 'map method `%s` to list of %s',
  90. meth.name, pt)
  91. -- Map method to a list of inputs, excluding empty elements
  92. input = fun.filter(function(map_elt) return map_elt end,
  93. fun.map(function(list_elt)
  94. local ret, _ = meth.process(list_elt, pt)
  95. return ret
  96. end, input))
  97. etype = 'string_list'
  98. end
  99. end
  100. -- Remove method from the pipeline
  101. pipe = fun.drop_n(1, pipe)
  102. elseif etype:match('^userdata') or etype:match('^table') then
  103. -- Implicit conversion
  104. local pt = pure_type(etype)
  105. if not pt then
  106. lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype)
  107. input = implicit_tostring(etype, input)
  108. etype = 'string'
  109. else
  110. lua_util.debugm(M, task, 'apply implicit map %s->string', pt)
  111. input = fun.filter(function(map_elt) return map_elt end,
  112. fun.map(function(list_elt)
  113. local ret = implicit_tostring(pt, list_elt)
  114. return ret
  115. end, input))
  116. etype = 'string_list'
  117. end
  118. end
  119. -- Now we fold elements using left fold
  120. local function fold_function(acc, x)
  121. if acc == nil or acc[1] == nil then
  122. lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name)
  123. return nil
  124. end
  125. local value = acc[1]
  126. local t = acc[2]
  127. if not x.types[t] then
  128. local pt = pure_type(t)
  129. if pt and x.types['list'] then
  130. -- Generic list processor
  131. lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t)
  132. return {x.process(value, t, x.args)}
  133. elseif pt and x.map_type and x.types[pt] then
  134. local map_type = x.map_type .. '_list'
  135. lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s',
  136. x.name, pt, map_type)
  137. -- Apply map, filtering empty values
  138. return {
  139. fun.filter(function(map_elt) return map_elt end,
  140. fun.map(function(list_elt)
  141. if not list_elt then return nil end
  142. local ret, _ = x.process(list_elt, pt, x.args)
  143. return ret
  144. end, value)),
  145. map_type -- Returned type
  146. }
  147. end
  148. logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
  149. return nil
  150. end
  151. lua_util.debugm(M, task, 'apply %s to %s', x.name, t)
  152. return {x.process(value, t, x.args)}
  153. end
  154. local res = fun.foldl(fold_function,
  155. {input, etype},
  156. pipe)
  157. if not res or not res[1] then return nil end -- Pipeline failed
  158. if not allowed_type(res[2]) then
  159. -- Search for implicit conversion
  160. local pt = pure_type(res[2])
  161. if pt then
  162. lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt)
  163. res[1] = fun.map(function(e) return implicit_tostring(pt, e) end, res[1])
  164. res[2] = 'string_list'
  165. else
  166. res[1] = implicit_tostring(res[2], res[1])
  167. res[2] = 'string'
  168. end
  169. end
  170. if list_type(res[2]) then
  171. -- Convert to table as it might have a functional form
  172. res[1] = fun.totable(res[1])
  173. end
  174. lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1])
  175. return res[1]
  176. end
  177. local function make_grammar()
  178. local l = require "lpeg"
  179. local spc = l.S(" \t\n")^0
  180. local cont = l.R("\128\191") -- continuation byte
  181. local utf8_high = l.R("\194\223") * cont
  182. + l.R("\224\239") * cont * cont
  183. + l.R("\240\244") * cont * cont * cont
  184. local atom = l.C((l.R("az") + l.R("AZ") + l.R("09") + l.S("_-") + utf8_high)^1)
  185. local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P'\\' * 1))^0) * "'"
  186. local doublequoted_string = l.P '"' * l.C(((1 - l.S'"\r\n\f\\') + (l.P'\\' * 1))^0) * '"'
  187. local argument = atom + singlequoted_string + doublequoted_string
  188. local dot = l.P(".")
  189. local semicolon = l.P(":")
  190. local obrace = "(" * spc
  191. local tbl_obrace = "{" * spc
  192. local eqsign = spc * "=" * spc
  193. local tbl_ebrace = spc * "}"
  194. local ebrace = spc * ")"
  195. local comma = spc * "," * spc
  196. local sel_separator = spc * l.S";*" * spc
  197. return l.P{
  198. "LIST";
  199. LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR")))^0,
  200. EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD"))^-1 * (dot * l.V("PROCESSOR"))^0,
  201. PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  202. FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  203. METHOD = l.Ct(atom / function(e) return '__' .. e end * spc * (obrace * l.V("ARG_LIST") * ebrace)^0),
  204. ARG_LIST = l.Ct((l.V("ARG") * comma^0)^0),
  205. ARG = l.Cf(tbl_obrace * l.V("NAMED_ARG") * tbl_ebrace, rawset) + argument + l.V("LIST_ARGS"),
  206. NAMED_ARG = (l.Ct("") * l.Cg(argument * eqsign * (argument + l.V("LIST_ARGS")) * comma^0)^0),
  207. LIST_ARGS = l.Ct(tbl_obrace * l.V("LIST_ARG") * tbl_ebrace),
  208. LIST_ARG = l.Cg(argument * comma^0)^0,
  209. }
  210. end
  211. local parser = make_grammar()
  212. --[[[
  213. -- @function lua_selectors.parse_selector(cfg, str)
  214. --]]
  215. exports.parse_selector = function(cfg, str)
  216. local parsed = {parser:match(str)}
  217. local output = {}
  218. if not parsed or not parsed[1] then return nil end
  219. local function check_args(name, schema, args)
  220. if schema then
  221. if getmetatable(schema) then
  222. -- Schema covers all arguments
  223. local res,err = schema:transform(args)
  224. if not res then
  225. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  226. return false
  227. else
  228. for i,elt in ipairs(res) do
  229. args[i] = elt
  230. end
  231. end
  232. else
  233. for i,selt in ipairs(schema) do
  234. local res,err = selt:transform(args[i])
  235. if err then
  236. logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
  237. return false
  238. else
  239. args[i] = res
  240. end
  241. end
  242. end
  243. end
  244. return true
  245. end
  246. -- Output AST format is the following:
  247. -- table of individual selectors
  248. -- each selector: list of functions
  249. -- each function: function name + optional list of arguments
  250. for _,sel in ipairs(parsed) do
  251. local res = {
  252. selector = {},
  253. processor_pipe = {},
  254. }
  255. local selector_tbl = sel[1]
  256. if not selector_tbl then
  257. logger.errx(cfg, 'no selector represented')
  258. return nil
  259. end
  260. if not extractors[selector_tbl[1]] then
  261. logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
  262. return nil
  263. end
  264. res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
  265. res.selector.name = selector_tbl[1]
  266. res.selector.args = selector_tbl[2] or E
  267. if not check_args(res.selector.name,
  268. res.selector.args_schema,
  269. res.selector.args) then
  270. return nil
  271. end
  272. lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
  273. res.selector.name, res.selector.args)
  274. local pipeline_error = false
  275. -- Now process processors pipe
  276. fun.each(function(proc_tbl)
  277. local proc_name = proc_tbl[1]
  278. if proc_name:match('^__') then
  279. -- Special case - method
  280. local method_name = proc_name:match('^__(.*)$')
  281. -- Check array indexing...
  282. if tonumber(method_name) then
  283. method_name = tonumber(method_name)
  284. end
  285. local processor = {
  286. name = tostring(method_name),
  287. method = true,
  288. args = proc_tbl[2] or E,
  289. types = {
  290. userdata = true,
  291. table = true,
  292. string = true,
  293. },
  294. map_type = 'string',
  295. process = function(inp, t, args)
  296. local ret
  297. if t == 'table' then
  298. -- Plain table field
  299. ret = inp[method_name]
  300. else
  301. -- We call method unpacking arguments and dropping all but the first result returned
  302. ret = (inp[method_name](inp, unpack_function(args or E)))
  303. end
  304. local ret_type = type(ret)
  305. if ret_type == 'nil' then return nil end
  306. -- Now apply types heuristic
  307. if ret_type == 'string' then
  308. return ret,'string'
  309. elseif ret_type == 'table' then
  310. -- TODO: we need to ensure that 1) table is numeric 2) table has merely strings
  311. return ret,'string_list'
  312. else
  313. return implicit_tostring(ret_type, ret)
  314. end
  315. end,
  316. }
  317. lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s',
  318. proc_name, res.selector.name, processor.args)
  319. table.insert(res.processor_pipe, processor)
  320. else
  321. if not transform_function[proc_name] then
  322. logger.errx(cfg, 'processor %s is unknown', proc_name)
  323. pipeline_error = proc_name
  324. return nil
  325. end
  326. local processor = lua_util.shallowcopy(transform_function[proc_name])
  327. processor.name = proc_name
  328. processor.args = proc_tbl[2] or E
  329. if not check_args(processor.name, processor.args_schema, processor.args) then
  330. pipeline_error = 'args schema for ' .. proc_name
  331. return nil
  332. end
  333. lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
  334. proc_name, res.selector.name, processor.args)
  335. table.insert(res.processor_pipe, processor)
  336. end
  337. end, fun.tail(sel))
  338. if pipeline_error then
  339. logger.errx(cfg, 'unknown or invalid processor used: "%s", exiting', pipeline_error)
  340. return nil
  341. end
  342. table.insert(output, res)
  343. end
  344. return output
  345. end
  346. --[[[
  347. -- @function lua_selectors.register_extractor(cfg, name, selector)
  348. --]]
  349. exports.register_extractor = function(cfg, name, selector)
  350. if selector.get_value then
  351. if extractors[name] then
  352. logger.warnx(cfg, 'redefining selector %s', name)
  353. end
  354. extractors[name] = selector
  355. return true
  356. end
  357. logger.errx(cfg, 'bad selector %s', name)
  358. return false
  359. end
  360. --[[[
  361. -- @function lua_selectors.register_transform(cfg, name, transform)
  362. --]]
  363. exports.register_transform = function(cfg, name, transform)
  364. if transform.process and transform.types then
  365. if transform_function[name] then
  366. logger.warnx(cfg, 'redefining transform function %s', name)
  367. end
  368. transform_function[name] = transform
  369. return true
  370. end
  371. logger.errx(cfg, 'bad transform function %s', name)
  372. return false
  373. end
  374. --[[[
  375. -- @function lua_selectors.process_selectors(task, selectors_pipe)
  376. --]]
  377. exports.process_selectors = function(task, selectors_pipe)
  378. local ret = {}
  379. for _,sel in ipairs(selectors_pipe) do
  380. local r = process_selector(task, sel)
  381. -- If any element is nil, then the whole selector is nil
  382. if not r then return nil end
  383. table.insert(ret, r)
  384. end
  385. return ret
  386. end
  387. --[[[
  388. -- @function lua_selectors.combine_selectors(task, selectors, delimiter)
  389. --]]
  390. exports.combine_selectors = function(_, selectors, delimiter)
  391. if not delimiter then delimiter = '' end
  392. if not selectors then return nil end
  393. local have_tables, have_userdata
  394. for _,s in ipairs(selectors) do
  395. if type(s) == 'table' then
  396. have_tables = true
  397. elseif type(s) == 'userdata' then
  398. have_userdata = true
  399. end
  400. end
  401. if not have_tables then
  402. if not have_userdata then
  403. return table.concat(selectors, delimiter)
  404. else
  405. return rspamd_text.fromtable(selectors, delimiter)
  406. end
  407. else
  408. -- We need to do a spill on each table selector and make a cortesian product
  409. -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
  410. local tbl = {}
  411. local res = {}
  412. for i,s in ipairs(selectors) do
  413. if type(s) == 'string' then
  414. rawset(tbl, i, fun.duplicate(s))
  415. elseif type(s) == 'userdata' then
  416. rawset(tbl, i, fun.duplicate(tostring(s)))
  417. else
  418. -- Raw table
  419. rawset(tbl, i, fun.map(tostring, s))
  420. end
  421. end
  422. fun.each(function(...)
  423. table.insert(res, table.concat({...}, delimiter))
  424. end, fun.zip(lua_util.unpack(tbl)))
  425. return res
  426. end
  427. end
  428. --[[[
  429. -- @function lua_selectors.flatten_selectors(selectors)
  430. -- Convert selectors to a flat table of elements
  431. --]]
  432. exports.flatten_selectors = function(selectors)
  433. local res = {}
  434. local function fill(tbl)
  435. for _,s in ipairs(tbl) do
  436. if type(s) == 'string' then
  437. rawset(res, #res + 1, s)
  438. elseif type(s) == 'userdata' then
  439. rawset(res, #res + 1, tostring(s))
  440. else
  441. fill(s)
  442. end
  443. end
  444. end
  445. fill(selectors)
  446. return res
  447. end
  448. --[[[
  449. -- @function lua_selectors.create_closure(cfg, selector_str, delimiter='', flatten=false)
  450. --]]
  451. exports.create_selector_closure = function(cfg, selector_str, delimiter, flatten)
  452. local selector = exports.parse_selector(cfg, selector_str)
  453. if not selector then
  454. return nil
  455. end
  456. return function(task)
  457. local res = exports.process_selectors(task, selector)
  458. if res then
  459. if flatten then
  460. return exports.flatten_selectors(res)
  461. else
  462. return exports.combine_selectors(nil, res, delimiter)
  463. end
  464. end
  465. return nil
  466. end
  467. end
  468. local function display_selectors(tbl)
  469. return fun.tomap(fun.map(function(k,v)
  470. return k, fun.tomap(fun.filter(function(kk, vv)
  471. return type(vv) ~= 'function'
  472. end, v))
  473. end, tbl))
  474. end
  475. exports.list_extractors = function()
  476. return display_selectors(extractors)
  477. end
  478. exports.list_transforms = function()
  479. return display_selectors(transform_function)
  480. end
  481. return exports