You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_maps.lua 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. --[[[
  2. -- @module lua_maps
  3. -- This module contains helper functions for managing rspamd maps
  4. --]]
  5. --[[
  6. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  7. Licensed under the Apache License, Version 2.0 (the "License");
  8. you may not use this file except in compliance with the License.
  9. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. ]]--
  17. local rspamd_logger = require "rspamd_logger"
  18. local ts = require("tableshape").types
  19. local lua_util = require "lua_util"
  20. local exports = {}
  21. local maps_cache = {}
  22. local function map_hash_key(data, mtype)
  23. local hash = require "rspamd_cryptobox_hash"
  24. local st = hash.create_specific('xxh64')
  25. st:update(data)
  26. st:update(mtype)
  27. return st:hex()
  28. end
  29. local function starts(where,st)
  30. return string.sub(where,1,string.len(st))==st
  31. end
  32. local function cut_prefix(where,st)
  33. return string.sub(where,#st + 1)
  34. end
  35. local function maybe_adjust_type(data,mtype)
  36. local function check_prefix(prefix, t)
  37. if starts(data, prefix) then
  38. data = cut_prefix(data, prefix)
  39. mtype = t
  40. return true
  41. end
  42. return false
  43. end
  44. local known_types = {
  45. {'regexp;', 'regexp'},
  46. {'re;', 'regexp'},
  47. {'regexp_multi;', 'regexp_multi'},
  48. {'re_multi;', 'regexp_multi'},
  49. {'glob;', 'glob'},
  50. {'glob_multi;', 'glob_multi'},
  51. {'radix;', 'radix'},
  52. {'ipnet;', 'radix'},
  53. {'set;', 'set'},
  54. {'hash;', 'hash'},
  55. {'plain;', 'hash'},
  56. {'cdb;', 'cdb'},
  57. {'cdb:/', 'cdb'},
  58. }
  59. if mtype == 'callback' then
  60. return mtype
  61. end
  62. for _,t in ipairs(known_types) do
  63. if check_prefix(t[1], t[2]) then
  64. return data,mtype
  65. end
  66. end
  67. -- No change
  68. return data,mtype
  69. end
  70. local external_map_schema = ts.shape{
  71. external = ts.equivalent(true), -- must be true
  72. backend = ts.string, -- where to get data, required
  73. method = ts.one_of{"body", "header", "query"}, -- how to pass input
  74. encode = ts.one_of{"json", "messagepack"}:is_optional(), -- how to encode input (if relevant)
  75. timeout = (ts.number + ts.string / lua_util.parse_time_interval):is_optional(),
  76. }
  77. local rspamd_http = require "rspamd_http"
  78. local ucl = require "ucl"
  79. local function url_encode_string(str)
  80. str = string.gsub(str, "([^%w _%%%-%.~])",
  81. function(c) return string.format("%%%02X", string.byte(c)) end)
  82. str = string.gsub(str, " ", "+")
  83. return str
  84. end
  85. assert(url_encode_string('上海+中國') == '%E4%B8%8A%E6%B5%B7%2B%E4%B8%AD%E5%9C%8B')
  86. assert(url_encode_string('? and the Mysterians') == '%3F+and+the+Mysterians')
  87. local function query_external_map(map_config, upstreams, key, callback, task)
  88. local http_method = (map_config.method == 'body' or map_config.method == 'form') and 'POST' or 'GET'
  89. local upstream = upstreams:get_upstream_round_robin()
  90. local http_headers = {
  91. ['Accept'] = '*/*'
  92. }
  93. local http_body = nil
  94. local url = map_config.backend
  95. if type(key) == 'string' or type(key) == 'userdata' then
  96. if map_config.method == 'body' then
  97. http_body = key
  98. http_headers['Content-Type'] = 'text/plain'
  99. elseif map_config.method == 'header' then
  100. http_headers = {
  101. key = key
  102. }
  103. elseif map_config.method == 'query' then
  104. url = string.format('%s?key=%s', url, url_encode_string(tostring(key)))
  105. end
  106. elseif type(key) == 'table' then
  107. if map_config.method == 'body' then
  108. if map_config.encode == 'json' then
  109. http_body = ucl.to_format(key, 'json-compact', true)
  110. http_headers['Content-Type'] = 'application/json'
  111. elseif map_config.encode == 'messagepack' then
  112. http_body = ucl.to_format(key, 'messagepack', true)
  113. http_headers['Content-Type'] = 'application/msgpack'
  114. else
  115. local caller = debug.getinfo(2) or {}
  116. rspamd_logger.errx(task,
  117. "requested external map key with a wrong combination body method and missing encode; caller: %s:%s",
  118. caller.short_src, caller.currentline)
  119. callback(false, 'invalid map usage', 500, task)
  120. end
  121. else
  122. -- query/header and no encode
  123. if map_config.method == 'query' then
  124. local params_table = {}
  125. for k,v in pairs(key) do
  126. if type(v) == 'string' then
  127. table.insert(params_table, string.format('%s=%s', url_encode_string(k), url_encode_string(v)))
  128. end
  129. end
  130. url = string.format('%s?%s', url, table.concat(params_table, '&'))
  131. elseif map_config.method == 'header' then
  132. http_headers = key
  133. else
  134. local caller = debug.getinfo(2) or {}
  135. rspamd_logger.errx(task,
  136. "requested external map key with a wrong combination of encode and input; caller: %s:%s",
  137. caller.short_src, caller.currentline)
  138. callback(false, 'invalid map usage', 500, task)
  139. return
  140. end
  141. end
  142. end
  143. local function map_callback(err, code, body, _)
  144. if err then
  145. callback(false, err, code, task)
  146. elseif code == 200 then
  147. callback(true, body, 200, task)
  148. else
  149. callback(false, err, code, task)
  150. end
  151. end
  152. local ret = rspamd_http.request{
  153. task = task,
  154. url = url,
  155. callback = map_callback,
  156. timeout = map_config.timeout or 1.0,
  157. keepalive = true,
  158. upstream = upstream,
  159. method = http_method,
  160. headers = http_headers,
  161. body = http_body,
  162. }
  163. if not ret then
  164. callback(false, 'http request error', 500, task)
  165. end
  166. end
  167. --[[[
  168. -- @function lua_maps.map_add_from_ucl(opt, mtype, description)
  169. -- Creates a map from static data
  170. -- Returns true if map was added or nil
  171. -- @param {string or table} opt data for map (or URL)
  172. -- @param {string} mtype type of map (`set`, `map`, `radix`, `regexp`)
  173. -- @param {string} description human-readable description of map
  174. -- @param {function} callback optional callback that will be called on map match (required for external maps)
  175. -- @return {bool} true on success, or `nil`
  176. --]]
  177. local function rspamd_map_add_from_ucl(opt, mtype, description, callback)
  178. local ret = {
  179. get_key = function(t, k, key_callback, task)
  180. if t.__data then
  181. local cb = key_callback or callback
  182. if t.__external then
  183. if not cb or not task then
  184. local caller = debug.getinfo(2) or {}
  185. rspamd_logger.errx(rspamd_config, "requested external map key without callback or task; caller: %s:%s",
  186. caller.short_src, caller.currentline)
  187. return nil
  188. end
  189. query_external_map(t.__data, t.__upstreams, k, cb, task)
  190. else
  191. local result = t.__data:get_key(k)
  192. if cb then
  193. if result then
  194. cb(true, result, 200, task)
  195. else
  196. cb(false, 'not found', 404, task)
  197. end
  198. else
  199. return result
  200. end
  201. end
  202. end
  203. return nil
  204. end,
  205. foreach = function(t, cb)
  206. return t.__data:foreach(cb)
  207. end,
  208. on_load = function(t, cb)
  209. t.__data:on_load(cb)
  210. end
  211. }
  212. local ret_mt = {
  213. __index = function(t, k, key_callback, task)
  214. if t.__data then
  215. return t.get_key(k, key_callback, task)
  216. end
  217. return nil
  218. end
  219. }
  220. if not opt then
  221. return nil
  222. end
  223. local function maybe_register_selector()
  224. if opt.selector_alias then
  225. local lua_selectors = require "lua_selectors"
  226. lua_selectors.add_map(opt.selector_alias, ret)
  227. end
  228. end
  229. if type(opt) == 'string' then
  230. opt,mtype = maybe_adjust_type(opt, mtype)
  231. local cache_key = map_hash_key(opt, mtype)
  232. if not callback and maps_cache[cache_key] then
  233. rspamd_logger.infox(rspamd_config, 'reuse url for %s(%s)',
  234. opt, mtype)
  235. return maps_cache[cache_key]
  236. end
  237. -- We have a single string, so we treat it as a map
  238. local map = rspamd_config:add_map{
  239. type = mtype,
  240. description = description,
  241. url = opt,
  242. }
  243. if map then
  244. ret.__data = map
  245. ret.hash = cache_key
  246. setmetatable(ret, ret_mt)
  247. maps_cache[cache_key] = ret
  248. return ret
  249. end
  250. elseif type(opt) == 'table' then
  251. local cache_key = lua_util.table_digest(opt)
  252. if not callback and maps_cache[cache_key] then
  253. rspamd_logger.infox(rspamd_config, 'reuse url for complex map definition %s: %s',
  254. cache_key:sub(1,8), description)
  255. return maps_cache[cache_key]
  256. end
  257. if opt[1] then
  258. -- Adjust each element if needed
  259. local adjusted
  260. for i,source in ipairs(opt) do
  261. local nsrc,ntype = maybe_adjust_type(source, mtype)
  262. if mtype ~= ntype then
  263. if not adjusted then
  264. mtype = ntype
  265. end
  266. adjusted = true
  267. end
  268. opt[i] = nsrc
  269. end
  270. if mtype == 'radix' then
  271. if string.find(opt[1], '^%d') then
  272. local map = rspamd_config:radix_from_ucl(opt)
  273. if map then
  274. ret.__data = map
  275. setmetatable(ret, ret_mt)
  276. maps_cache[cache_key] = ret
  277. maybe_register_selector()
  278. return ret
  279. end
  280. else
  281. -- Plain table
  282. local map = rspamd_config:add_map{
  283. type = mtype,
  284. description = description,
  285. url = opt,
  286. }
  287. if map then
  288. ret.__data = map
  289. setmetatable(ret, ret_mt)
  290. maps_cache[cache_key] = ret
  291. maybe_register_selector()
  292. return ret
  293. end
  294. end
  295. elseif mtype == 'regexp' or mtype == 'glob' then
  296. if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then
  297. -- Plain table
  298. local map = rspamd_config:add_map{
  299. type = mtype,
  300. description = description,
  301. url = opt,
  302. }
  303. if map then
  304. ret.__data = map
  305. setmetatable(ret, ret_mt)
  306. maps_cache[cache_key] = ret
  307. maybe_register_selector()
  308. return ret
  309. end
  310. else
  311. local map = rspamd_config:add_map{
  312. type = mtype,
  313. description = description,
  314. url = {
  315. url = 'static',
  316. data = opt,
  317. }
  318. }
  319. if map then
  320. ret.__data = map
  321. setmetatable(ret, ret_mt)
  322. maps_cache[cache_key] = ret
  323. maybe_register_selector()
  324. return ret
  325. end
  326. end
  327. else
  328. if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then
  329. -- Plain table
  330. local map = rspamd_config:add_map{
  331. type = mtype,
  332. description = description,
  333. url = opt,
  334. }
  335. if map then
  336. ret.__data = map
  337. setmetatable(ret, ret_mt)
  338. maps_cache[cache_key] = ret
  339. maybe_register_selector()
  340. return ret
  341. end
  342. else
  343. local data = {}
  344. local nelts = 0
  345. -- Plain array of keys, count merely numeric elts
  346. for _,elt in ipairs(opt) do
  347. if type(elt) == 'string' then
  348. -- Numeric table
  349. if mtype == 'hash' then
  350. -- Treat as KV pair
  351. local pieces = lua_util.str_split(elt, ' ')
  352. if #pieces > 1 then
  353. local key = table.remove(pieces, 1)
  354. data[key] = table.concat(pieces, ' ')
  355. else
  356. data[elt] = true
  357. end
  358. else
  359. data[elt] = true
  360. end
  361. nelts = nelts + 1
  362. end
  363. end
  364. if nelts > 0 then
  365. -- Plain Lua table that is used as a map
  366. ret.__data = data
  367. ret.get_key = function(t, k)
  368. if k ~= '__data' then
  369. return t.__data[k]
  370. end
  371. return nil
  372. end
  373. ret.foreach = function(_, func)
  374. for k,v in pairs(ret.__data) do
  375. if not func(k, v) then
  376. return false
  377. end
  378. end
  379. return true
  380. end
  381. ret.on_load = function(_, cb)
  382. rspamd_config:add_on_load(function(_, _, _)
  383. cb()
  384. end)
  385. end
  386. maps_cache[cache_key] = ret
  387. maybe_register_selector()
  388. return ret
  389. else
  390. -- Empty map, huh?
  391. rspamd_logger.errx(rspamd_config, 'invalid map element: %s',
  392. opt)
  393. end
  394. end
  395. end
  396. else
  397. if opt.external then
  398. -- External map definition, missing fields are handled by schema
  399. local parse_res,parse_err = external_map_schema(opt)
  400. if parse_res then
  401. ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend)
  402. if ret.__upstreams then
  403. ret.__data = opt
  404. ret.__external = true
  405. setmetatable(ret, ret_mt)
  406. maybe_register_selector()
  407. return ret
  408. else
  409. rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s',
  410. opt.backend)
  411. end
  412. else
  413. rspamd_logger.errx(rspamd_config, 'cannot parse external map: %s',
  414. parse_err)
  415. end
  416. else
  417. -- Adjust lua specific augmentations in a trivial case
  418. if type(opt.url) == 'string' then
  419. local nsrc,ntype = maybe_adjust_type(opt.url, mtype)
  420. if nsrc and ntype then
  421. opt.url = nsrc
  422. mtype = ntype
  423. end
  424. end
  425. -- We have some non-trivial object so let C code to deal with it somehow...
  426. local map = rspamd_config:add_map{
  427. type = mtype,
  428. description = description,
  429. url = opt,
  430. }
  431. if map then
  432. ret.__data = map
  433. setmetatable(ret, ret_mt)
  434. maps_cache[cache_key] = ret
  435. maybe_register_selector()
  436. return ret
  437. end
  438. end
  439. end -- opt[1]
  440. end
  441. return nil
  442. end
  443. --[[[
  444. -- @function lua_maps.map_add(mname, optname, mtype, description)
  445. -- Creates a map from configuration elements (static data or URL)
  446. -- Returns true if map was added or nil
  447. -- @param {string} mname config section to use
  448. -- @param {string} optname option name to use
  449. -- @param {string} mtype type of map ('set', 'hash', 'radix', 'regexp', 'glob')
  450. -- @param {string} description human-readable description of map
  451. -- @param {function} callback optional callback that will be called on map match (required for external maps)
  452. -- @return {bool} true on success, or `nil`
  453. --]]
  454. local function rspamd_map_add(mname, optname, mtype, description, callback)
  455. local opt = rspamd_config:get_module_opt(mname, optname)
  456. return rspamd_map_add_from_ucl(opt, mtype, description, callback)
  457. end
  458. exports.rspamd_map_add = rspamd_map_add
  459. exports.map_add = rspamd_map_add
  460. exports.rspamd_map_add_from_ucl = rspamd_map_add_from_ucl
  461. exports.map_add_from_ucl = rspamd_map_add_from_ucl
  462. -- Check `what` for being lua_map name, otherwise just compares key with what
  463. local function rspamd_maybe_check_map(key, what)
  464. local fun = require "fun"
  465. if type(what) == "table" then
  466. return fun.any(function(elt) return rspamd_maybe_check_map(key, elt) end, what)
  467. end
  468. if type(rspamd_maps) == "table" then
  469. local mn
  470. if starts(key, "map:") then
  471. mn = string.sub(key, 5)
  472. elseif starts(key, "map://") then
  473. mn = string.sub(key, 7)
  474. end
  475. if mn and rspamd_maps[mn] then
  476. return rspamd_maps[mn]:get_key(what)
  477. end
  478. end
  479. return what:lower() == key
  480. end
  481. exports.rspamd_maybe_check_map = rspamd_maybe_check_map
  482. --[[[
  483. -- @function lua_maps.fill_config_maps(mname, options, defs)
  484. -- Fill maps that could be defined in defs, from the config in the options
  485. -- Defs is a table indexed by a map's parameter name and defining it's config,
  486. -- for example:
  487. defs = {
  488. my_map = {
  489. type = 'map',
  490. description = 'my cool map',
  491. optional = true,
  492. }
  493. }
  494. -- Then this function will look for opts.my_map parameter and try to replace it's with
  495. -- a map with the specific type, description but not failing if it was empty.
  496. -- It will also set options.my_map_orig to the original value defined in the map
  497. --]]
  498. exports.fill_config_maps = function(mname, opts, map_defs)
  499. assert(type(opts) == 'table')
  500. assert(type(map_defs) == 'table')
  501. for k, v in pairs(map_defs) do
  502. if opts[k] then
  503. local map = rspamd_map_add_from_ucl(opts[k], v.type or 'map', v.description)
  504. if not map then
  505. rspamd_logger.errx(rspamd_config, 'map add error %s for module %s', k, mname)
  506. return false
  507. end
  508. opts[k..'_orig'] = opts[k]
  509. opts[k] = map
  510. elseif not v.optional then
  511. rspamd_logger.errx(rspamd_config, 'cannot find non optional map %s for module %s', k, mname)
  512. return false
  513. end
  514. end
  515. return true
  516. end
  517. local direct_map_schema = ts.shape{ -- complex object
  518. name = ts.string:is_optional(),
  519. description = ts.string:is_optional(),
  520. selector_alias = ts.string:is_optional(), -- an optional alias for the selectos framework
  521. timeout = ts.number,
  522. data = ts.array_of(ts.string):is_optional(),
  523. -- Tableshape has no options support for something like key1 or key2?
  524. upstreams = ts.one_of{
  525. ts.string,
  526. ts.array_of(ts.string),
  527. }:is_optional(),
  528. url = ts.one_of{
  529. ts.string,
  530. ts.array_of(ts.string),
  531. }:is_optional(),
  532. }
  533. exports.map_schema = ts.one_of{
  534. ts.string, -- 'http://some_map'
  535. ts.array_of(ts.string), -- ['foo', 'bar']
  536. ts.one_of{direct_map_schema, external_map_schema}
  537. }
  538. return exports