You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_maps.lua 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. --[[[
  2. -- @module lua_maps
  3. -- This module contains helper functions for managing rspamd maps
  4. --]]
  5. --[[
  6. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  7. Licensed under the Apache License, Version 2.0 (the "License");
  8. you may not use this file except in compliance with the License.
  9. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. ]]--
  17. local rspamd_logger = require "rspamd_logger"
  18. local ts = require("tableshape").types
  19. local lua_util = require "lua_util"
  20. local exports = {}
  21. local maps_cache = {}
  22. local function map_hash_key(data, mtype)
  23. local hash = require "rspamd_cryptobox_hash"
  24. local st = hash.create_specific('xxh64')
  25. st:update(data)
  26. st:update(mtype)
  27. return st:hex()
  28. end
  29. local function starts(where,st)
  30. return string.sub(where,1,string.len(st))==st
  31. end
  32. local function cut_prefix(where,st)
  33. return string.sub(where,#st + 1)
  34. end
  35. local function maybe_adjust_type(data,mtype)
  36. local function check_prefix(prefix, t)
  37. if starts(data, prefix) then
  38. data = cut_prefix(data, prefix)
  39. mtype = t
  40. return true
  41. end
  42. return false
  43. end
  44. local known_types = {
  45. {'regexp;', 'regexp'},
  46. {'re;', 'regexp'},
  47. {'regexp_multi;', 'regexp_multi'},
  48. {'re_multi;', 'regexp_multi'},
  49. {'glob;', 'glob'},
  50. {'glob_multi;', 'glob_multi'},
  51. {'radix;', 'radix'},
  52. {'ipnet;', 'radix'},
  53. {'set;', 'set'},
  54. {'hash;', 'hash'},
  55. {'plain;', 'hash'},
  56. {'cdb;', 'cdb'},
  57. {'cdb:/', 'cdb'},
  58. }
  59. if mtype == 'callback' then
  60. return mtype
  61. end
  62. for _,t in ipairs(known_types) do
  63. if check_prefix(t[1], t[2]) then
  64. return data,mtype
  65. end
  66. end
  67. -- No change
  68. return data,mtype
  69. end
  70. local external_map_schema = ts.shape{
  71. external = ts.equivalent(true), -- must be true
  72. backend = ts.string, -- where to get data, required
  73. method = ts.one_of{"body", "header", "query"}, -- how to pass input
  74. encode = ts.one_of{"json", "messagepack"}:is_optional(), -- how to encode input (if relevant)
  75. timeout = (ts.number + ts.string / lua_util.parse_time_interval):is_optional(),
  76. }
  77. local rspamd_http = require "rspamd_http"
  78. local ucl = require "ucl"
  79. local function url_encode_string(str)
  80. str = string.gsub(str, "([^%w _%%%-%.~])",
  81. function(c) return string.format("%%%02X", string.byte(c)) end)
  82. str = string.gsub(str, " ", "+")
  83. return str
  84. end
  85. assert(url_encode_string('上海+中國') == '%E4%B8%8A%E6%B5%B7%2B%E4%B8%AD%E5%9C%8B')
  86. assert(url_encode_string('? and the Mysterians') == '%3F+and+the+Mysterians')
  87. local function query_external_map(map_config, upstreams, key, callback, task)
  88. local http_method = (map_config.method == 'body' or map_config.method == 'form') and 'POST' or 'GET'
  89. local upstream = upstreams:get_upstream_round_robin()
  90. local http_headers = {
  91. ['Accept'] = '*/*'
  92. }
  93. local http_body = nil
  94. local url = map_config.backend
  95. if type(key) == 'string' or type(key) == 'userdata' then
  96. if map_config.method == 'body' then
  97. http_body = key
  98. http_headers['Content-Type'] = 'text/plain'
  99. elseif map_config.method == 'header' then
  100. http_headers = {
  101. key = key
  102. }
  103. elseif map_config.method == 'query' then
  104. url = string.format('%s?key=%s', url, url_encode_string(tostring(key)))
  105. end
  106. elseif type(key) == 'table' then
  107. if map_config.method == 'body' then
  108. if map_config.encode == 'json' then
  109. http_body = ucl.to_format(key, 'json-compact', true)
  110. http_headers['Content-Type'] = 'application/json'
  111. elseif map_config.encode == 'messagepack' then
  112. http_body = ucl.to_format(key, 'messagepack', true)
  113. http_headers['Content-Type'] = 'application/msgpack'
  114. else
  115. local caller = debug.getinfo(2) or {}
  116. rspamd_logger.errx(task,
  117. "requested external map key with a wrong combination body method and missing encode; caller: %s:%s",
  118. caller.short_src, caller.currentline)
  119. callback(false, 'invalid map usage', 500, task)
  120. end
  121. else
  122. -- query/header and no encode
  123. if map_config.method == 'query' then
  124. local params_table = {}
  125. for k,v in pairs(key) do
  126. if type(v) == 'string' then
  127. table.insert(params_table, string.format('%s=%s', url_encode_string(k), url_encode_string(v)))
  128. end
  129. end
  130. url = string.format('%s?%s', url, table.concat(params_table, '&'))
  131. elseif map_config.method == 'header' then
  132. http_headers = key
  133. else
  134. local caller = debug.getinfo(2) or {}
  135. rspamd_logger.errx(task,
  136. "requested external map key with a wrong combination of encode and input; caller: %s:%s",
  137. caller.short_src, caller.currentline)
  138. callback(false, 'invalid map usage', 500, task)
  139. return
  140. end
  141. end
  142. end
  143. local function map_callback(err, code, body, _)
  144. if err then
  145. callback(false, err, code, task)
  146. elseif code == 200 then
  147. callback(true, body, 200, task)
  148. else
  149. callback(false, err, code, task)
  150. end
  151. end
  152. local ret = rspamd_http.request{
  153. task = task,
  154. url = url,
  155. callback = map_callback,
  156. timeout = map_config.timeout or 1.0,
  157. keepalive = true,
  158. upstream = upstream,
  159. method = http_method,
  160. headers = http_headers,
  161. body = http_body,
  162. }
  163. if not ret then
  164. callback(false, 'http request error', 500, task)
  165. end
  166. end
  167. --[[[
  168. -- @function lua_maps.map_add_from_ucl(opt, mtype, description)
  169. -- Creates a map from static data
  170. -- Returns true if map was added or nil
  171. -- @param {string or table} opt data for map (or URL)
  172. -- @param {string} mtype type of map (`set`, `map`, `radix`, `regexp`)
  173. -- @param {string} description human-readable description of map
  174. -- @param {function} callback optional callback that will be called on map match (required for external maps)
  175. -- @return {bool} true on success, or `nil`
  176. --]]
  177. local function rspamd_map_add_from_ucl(opt, mtype, description, callback)
  178. local ret = {
  179. get_key = function(t, k, key_callback, task)
  180. if t.__data then
  181. local cb = key_callback or callback
  182. if t.__external then
  183. if not cb or not task then
  184. local caller = debug.getinfo(2) or {}
  185. rspamd_logger.errx(rspamd_config, "requested external map key without callback or task; caller: %s:%s",
  186. caller.short_src, caller.currentline)
  187. return nil
  188. end
  189. query_external_map(t.__data, t.__upstreams, k, cb, task)
  190. else
  191. local result = t.__data:get_key(k)
  192. if cb then
  193. if result then
  194. cb(true, result, 200, task)
  195. else
  196. cb(false, 'not found', 404, task)
  197. end
  198. else
  199. return result
  200. end
  201. end
  202. end
  203. return nil
  204. end
  205. }
  206. local ret_mt = {
  207. __index = function(t, k, key_callback, task)
  208. if t.__data then
  209. return t.get_key(k, key_callback, task)
  210. end
  211. return nil
  212. end
  213. }
  214. if not opt then
  215. return nil
  216. end
  217. local function maybe_register_selector()
  218. if opt.selector_alias then
  219. local lua_selectors = require "lua_selectors"
  220. lua_selectors.add_map(opt.selector_alias, ret)
  221. end
  222. end
  223. if type(opt) == 'string' then
  224. opt,mtype = maybe_adjust_type(opt, mtype)
  225. local cache_key = map_hash_key(opt, mtype)
  226. if not callback and maps_cache[cache_key] then
  227. rspamd_logger.infox(rspamd_config, 'reuse url for %s(%s)',
  228. opt, mtype)
  229. return maps_cache[cache_key]
  230. end
  231. -- We have a single string, so we treat it as a map
  232. local map = rspamd_config:add_map{
  233. type = mtype,
  234. description = description,
  235. url = opt,
  236. }
  237. if map then
  238. ret.__data = map
  239. ret.hash = cache_key
  240. setmetatable(ret, ret_mt)
  241. maps_cache[cache_key] = ret
  242. return ret
  243. end
  244. elseif type(opt) == 'table' then
  245. local cache_key = lua_util.table_digest(opt)
  246. if not callback and maps_cache[cache_key] then
  247. rspamd_logger.infox(rspamd_config, 'reuse url for complex map definition %s: %s',
  248. cache_key:sub(1,8), description)
  249. return maps_cache[cache_key]
  250. end
  251. if opt[1] then
  252. -- Adjust each element if needed
  253. local adjusted
  254. for i,source in ipairs(opt) do
  255. local nsrc,ntype = maybe_adjust_type(source, mtype)
  256. if mtype ~= ntype then
  257. if not adjusted then
  258. mtype = ntype
  259. end
  260. adjusted = true
  261. end
  262. opt[i] = nsrc
  263. end
  264. if mtype == 'radix' then
  265. if string.find(opt[1], '^%d') then
  266. local map = rspamd_config:radix_from_ucl(opt)
  267. if map then
  268. ret.__data = map
  269. setmetatable(ret, ret_mt)
  270. maps_cache[cache_key] = ret
  271. maybe_register_selector()
  272. return ret
  273. end
  274. else
  275. -- Plain table
  276. local map = rspamd_config:add_map{
  277. type = mtype,
  278. description = description,
  279. url = opt,
  280. }
  281. if map then
  282. ret.__data = map
  283. setmetatable(ret, ret_mt)
  284. maps_cache[cache_key] = ret
  285. maybe_register_selector()
  286. return ret
  287. end
  288. end
  289. elseif mtype == 'regexp' or mtype == 'glob' then
  290. if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then
  291. -- Plain table
  292. local map = rspamd_config:add_map{
  293. type = mtype,
  294. description = description,
  295. url = opt,
  296. }
  297. if map then
  298. ret.__data = map
  299. setmetatable(ret, ret_mt)
  300. maps_cache[cache_key] = ret
  301. maybe_register_selector()
  302. return ret
  303. end
  304. else
  305. local map = rspamd_config:add_map{
  306. type = mtype,
  307. description = description,
  308. url = {
  309. url = 'static',
  310. data = opt,
  311. }
  312. }
  313. if map then
  314. ret.__data = map
  315. setmetatable(ret, ret_mt)
  316. maps_cache[cache_key] = ret
  317. maybe_register_selector()
  318. return ret
  319. end
  320. end
  321. else
  322. if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then
  323. -- Plain table
  324. local map = rspamd_config:add_map{
  325. type = mtype,
  326. description = description,
  327. url = opt,
  328. }
  329. if map then
  330. ret.__data = map
  331. setmetatable(ret, ret_mt)
  332. maps_cache[cache_key] = ret
  333. maybe_register_selector()
  334. return ret
  335. end
  336. else
  337. local data = {}
  338. local nelts = 0
  339. -- Plain array of keys, count merely numeric elts
  340. for _,elt in ipairs(opt) do
  341. if type(elt) == 'string' then
  342. -- Numeric table
  343. if mtype == 'hash' then
  344. -- Treat as KV pair
  345. local pieces = lua_util.str_split(elt, ' ')
  346. if #pieces > 1 then
  347. local key = table.remove(pieces, 1)
  348. data[key] = table.concat(pieces, ' ')
  349. else
  350. data[elt] = true
  351. end
  352. else
  353. data[elt] = true
  354. end
  355. nelts = nelts + 1
  356. end
  357. end
  358. if nelts > 0 then
  359. -- Plain Lua table that is used as a map
  360. ret.__data = data
  361. ret.get_key = function(t, k)
  362. if k ~= '__data' then
  363. return t.__data[k]
  364. end
  365. return nil
  366. end
  367. maps_cache[cache_key] = ret
  368. maybe_register_selector()
  369. return ret
  370. else
  371. -- Empty map, huh?
  372. rspamd_logger.errx(rspamd_config, 'invalid map element: %s',
  373. opt)
  374. end
  375. end
  376. end
  377. else
  378. if opt.external then
  379. -- External map definition, missing fields are handled by schema
  380. local parse_res,parse_err = external_map_schema(opt)
  381. if parse_res then
  382. ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend)
  383. if ret.__upstreams then
  384. ret.__data = opt
  385. ret.__external = true
  386. setmetatable(ret, ret_mt)
  387. maybe_register_selector()
  388. return ret
  389. else
  390. rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s',
  391. opt.backend)
  392. end
  393. else
  394. rspamd_logger.errx(rspamd_config, 'cannot parse external map: %s',
  395. parse_err)
  396. end
  397. else
  398. -- Adjust lua specific augmentations in a trivial case
  399. if type(opt.url) == 'string' then
  400. local nsrc,ntype = maybe_adjust_type(opt.url, mtype)
  401. if nsrc and ntype then
  402. opt.url = nsrc
  403. mtype = ntype
  404. end
  405. end
  406. -- We have some non-trivial object so let C code to deal with it somehow...
  407. local map = rspamd_config:add_map{
  408. type = mtype,
  409. description = description,
  410. url = opt,
  411. }
  412. if map then
  413. ret.__data = map
  414. setmetatable(ret, ret_mt)
  415. maps_cache[cache_key] = ret
  416. maybe_register_selector()
  417. return ret
  418. end
  419. end
  420. end -- opt[1]
  421. end
  422. return nil
  423. end
  424. --[[[
  425. -- @function lua_maps.map_add(mname, optname, mtype, description)
  426. -- Creates a map from configuration elements (static data or URL)
  427. -- Returns true if map was added or nil
  428. -- @param {string} mname config section to use
  429. -- @param {string} optname option name to use
  430. -- @param {string} mtype type of map ('set', 'hash', 'radix', 'regexp', 'glob')
  431. -- @param {string} description human-readable description of map
  432. -- @param {function} callback optional callback that will be called on map match (required for external maps)
  433. -- @return {bool} true on success, or `nil`
  434. --]]
  435. local function rspamd_map_add(mname, optname, mtype, description, callback)
  436. local opt = rspamd_config:get_module_opt(mname, optname)
  437. return rspamd_map_add_from_ucl(opt, mtype, description, callback)
  438. end
  439. exports.rspamd_map_add = rspamd_map_add
  440. exports.map_add = rspamd_map_add
  441. exports.rspamd_map_add_from_ucl = rspamd_map_add_from_ucl
  442. exports.map_add_from_ucl = rspamd_map_add_from_ucl
  443. -- Check `what` for being lua_map name, otherwise just compares key with what
  444. local function rspamd_maybe_check_map(key, what)
  445. local fun = require "fun"
  446. if type(what) == "table" then
  447. return fun.any(function(elt) return rspamd_maybe_check_map(key, elt) end, what)
  448. end
  449. if type(rspamd_maps) == "table" then
  450. local mn
  451. if starts(key, "map:") then
  452. mn = string.sub(key, 5)
  453. elseif starts(key, "map://") then
  454. mn = string.sub(key, 7)
  455. end
  456. if mn and rspamd_maps[mn] then
  457. return rspamd_maps[mn]:get_key(what)
  458. end
  459. end
  460. return what:lower() == key
  461. end
  462. exports.rspamd_maybe_check_map = rspamd_maybe_check_map
  463. --[[[
  464. -- @function lua_maps.fill_config_maps(mname, options, defs)
  465. -- Fill maps that could be defined in defs, from the config in the options
  466. -- Defs is a table indexed by a map's parameter name and defining it's config,
  467. -- for example:
  468. defs = {
  469. my_map = {
  470. type = 'map',
  471. description = 'my cool map',
  472. optional = true,
  473. }
  474. }
  475. -- Then this function will look for opts.my_map parameter and try to replace it's with
  476. -- a map with the specific type, description but not failing if it was empty.
  477. -- It will also set options.my_map_orig to the original value defined in the map
  478. --]]
  479. exports.fill_config_maps = function(mname, opts, map_defs)
  480. assert(type(opts) == 'table')
  481. assert(type(map_defs) == 'table')
  482. for k, v in pairs(map_defs) do
  483. if opts[k] then
  484. local map = rspamd_map_add_from_ucl(opts[k], v.type or 'map', v.description)
  485. if not map then
  486. rspamd_logger.errx(rspamd_config, 'map add error %s for module %s', k, mname)
  487. return false
  488. end
  489. opts[k..'_orig'] = opts[k]
  490. opts[k] = map
  491. elseif not v.optional then
  492. rspamd_logger.errx(rspamd_config, 'cannot find non optional map %s for module %s', k, mname)
  493. return false
  494. end
  495. end
  496. return true
  497. end
  498. local direct_map_schema = ts.shape{ -- complex object
  499. name = ts.string:is_optional(),
  500. description = ts.string:is_optional(),
  501. selector_alias = ts.string:is_optional(), -- an optional alias for the selectos framework
  502. timeout = ts.number,
  503. data = ts.array_of(ts.string):is_optional(),
  504. -- Tableshape has no options support for something like key1 or key2?
  505. upstreams = ts.one_of{
  506. ts.string,
  507. ts.array_of(ts.string),
  508. }:is_optional(),
  509. url = ts.one_of{
  510. ts.string,
  511. ts.array_of(ts.string),
  512. }:is_optional(),
  513. }
  514. exports.map_schema = ts.one_of{
  515. ts.string, -- 'http://some_map'
  516. ts.array_of(ts.string), -- ['foo', 'bar']
  517. ts.one_of{direct_map_schema, external_map_schema}
  518. }
  519. return exports