You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_maps.lua 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. --[[[
  2. -- @module lua_maps
  3. -- This module contains helper functions for managing rspamd maps
  4. --]]
  5. --[[
  6. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  7. Licensed under the Apache License, Version 2.0 (the "License");
  8. you may not use this file except in compliance with the License.
  9. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. ]]--
  17. local rspamd_logger = require "rspamd_logger"
  18. local ts = require("tableshape").types
  19. local lua_util = require "lua_util"
  20. local exports = {}
  21. local maps_cache = {}
  22. local function map_hash_key(data, mtype)
  23. local hash = require "rspamd_cryptobox_hash"
  24. local st = hash.create_specific('xxh64')
  25. st:update(data)
  26. st:update(mtype)
  27. return st:hex()
  28. end
  29. local function starts(where, st)
  30. return string.sub(where, 1, string.len(st)) == st
  31. end
  32. local function cut_prefix(where, st)
  33. return string.sub(where, #st + 1)
  34. end
  35. local function maybe_adjust_type(data, mtype)
  36. local function check_prefix(prefix, t)
  37. if starts(data, prefix) then
  38. data = cut_prefix(data, prefix)
  39. mtype = t
  40. return true
  41. end
  42. return false
  43. end
  44. local known_types = {
  45. { 'regexp;', 'regexp' },
  46. { 're;', 'regexp' },
  47. { 'regexp_multi;', 'regexp_multi' },
  48. { 're_multi;', 'regexp_multi' },
  49. { 'glob;', 'glob' },
  50. { 'glob_multi;', 'glob_multi' },
  51. { 'radix;', 'radix' },
  52. { 'ipnet;', 'radix' },
  53. { 'set;', 'set' },
  54. { 'hash;', 'hash' },
  55. { 'plain;', 'hash' },
  56. { 'cdb;', 'cdb' },
  57. { 'cdb:/', 'cdb' },
  58. }
  59. if mtype == 'callback' then
  60. return mtype
  61. end
  62. for _, t in ipairs(known_types) do
  63. if check_prefix(t[1], t[2]) then
  64. return data, mtype
  65. end
  66. end
  67. -- No change
  68. return data, mtype
  69. end
  70. local external_map_schema = ts.shape {
  71. external = ts.equivalent(true), -- must be true
  72. backend = ts.string, -- where to get data, required
  73. method = ts.one_of { "body", "header", "query" }, -- how to pass input
  74. encode = ts.one_of { "json", "messagepack" }:is_optional(), -- how to encode input (if relevant)
  75. timeout = (ts.number + ts.string / lua_util.parse_time_interval):is_optional(),
  76. }
  77. local rspamd_http = require "rspamd_http"
  78. local ucl = require "ucl"
  79. local function url_encode_string(str)
  80. str = string.gsub(str, "([^%w _%%%-%.~])",
  81. function(c)
  82. return string.format("%%%02X", string.byte(c))
  83. end)
  84. str = string.gsub(str, " ", "+")
  85. return str
  86. end
  87. assert(url_encode_string('上海+中國') == '%E4%B8%8A%E6%B5%B7%2B%E4%B8%AD%E5%9C%8B')
  88. assert(url_encode_string('? and the Mysterians') == '%3F+and+the+Mysterians')
  89. local function query_external_map(map_config, upstreams, key, callback, task)
  90. local http_method = (map_config.method == 'body' or map_config.method == 'form') and 'POST' or 'GET'
  91. local upstream = upstreams:get_upstream_round_robin()
  92. local http_headers = {
  93. ['Accept'] = '*/*'
  94. }
  95. local http_body = nil
  96. local url = map_config.backend
  97. if type(key) == 'string' or type(key) == 'userdata' then
  98. if map_config.method == 'body' then
  99. http_body = key
  100. http_headers['Content-Type'] = 'text/plain'
  101. elseif map_config.method == 'header' then
  102. http_headers = {
  103. key = key
  104. }
  105. elseif map_config.method == 'query' then
  106. url = string.format('%s?key=%s', url, url_encode_string(tostring(key)))
  107. end
  108. elseif type(key) == 'table' then
  109. if map_config.method == 'body' then
  110. if map_config.encode == 'json' then
  111. http_body = ucl.to_format(key, 'json-compact', true)
  112. http_headers['Content-Type'] = 'application/json'
  113. elseif map_config.encode == 'messagepack' then
  114. http_body = ucl.to_format(key, 'messagepack', true)
  115. http_headers['Content-Type'] = 'application/msgpack'
  116. else
  117. local caller = debug.getinfo(2) or {}
  118. rspamd_logger.errx(task,
  119. "requested external map key with a wrong combination body method and missing encode; caller: %s:%s",
  120. caller.short_src, caller.currentline)
  121. callback(false, 'invalid map usage', 500, task)
  122. end
  123. else
  124. -- query/header and no encode
  125. if map_config.method == 'query' then
  126. local params_table = {}
  127. for k, v in pairs(key) do
  128. if type(v) == 'string' then
  129. table.insert(params_table, string.format('%s=%s', url_encode_string(k), url_encode_string(v)))
  130. end
  131. end
  132. url = string.format('%s?%s', url, table.concat(params_table, '&'))
  133. elseif map_config.method == 'header' then
  134. http_headers = key
  135. else
  136. local caller = debug.getinfo(2) or {}
  137. rspamd_logger.errx(task,
  138. "requested external map key with a wrong combination of encode and input; caller: %s:%s",
  139. caller.short_src, caller.currentline)
  140. callback(false, 'invalid map usage', 500, task)
  141. return
  142. end
  143. end
  144. end
  145. local function map_callback(err, code, body, _)
  146. if err then
  147. callback(false, err, code, task)
  148. elseif code == 200 then
  149. callback(true, body, 200, task)
  150. else
  151. callback(false, err, code, task)
  152. end
  153. end
  154. local ret = rspamd_http.request {
  155. task = task,
  156. url = url,
  157. callback = map_callback,
  158. timeout = map_config.timeout or 1.0,
  159. keepalive = true,
  160. upstream = upstream,
  161. method = http_method,
  162. headers = http_headers,
  163. body = http_body,
  164. }
  165. if not ret then
  166. callback(false, 'http request error', 500, task)
  167. end
  168. end
  169. --[[[
  170. -- @function lua_maps.map_add_from_ucl(opt, mtype, description)
  171. -- Creates a map from static data
  172. -- Returns true if map was added or nil
  173. -- @param {string or table} opt data for map (or URL)
  174. -- @param {string} mtype type of map (`set`, `map`, `radix`, `regexp`)
  175. -- @param {string} description human-readable description of map
  176. -- @param {function} callback optional callback that will be called on map match (required for external maps)
  177. -- @return {bool} true on success, or `nil`
  178. --]]
  179. local function rspamd_map_add_from_ucl(opt, mtype, description, callback)
  180. local ret = {
  181. get_key = function(t, k, key_callback, task)
  182. if t.__data then
  183. local cb = key_callback or callback
  184. if t.__external then
  185. if not cb or not task then
  186. local caller = debug.getinfo(2) or {}
  187. rspamd_logger.errx(rspamd_config, "requested external map key without callback or task; caller: %s:%s",
  188. caller.short_src, caller.currentline)
  189. return nil
  190. end
  191. query_external_map(t.__data, t.__upstreams, k, cb, task)
  192. else
  193. local result = t.__data:get_key(k)
  194. if cb then
  195. if result then
  196. cb(true, result, 200, task)
  197. else
  198. cb(false, 'not found', 404, task)
  199. end
  200. else
  201. return result
  202. end
  203. end
  204. end
  205. return nil
  206. end,
  207. foreach = function(t, cb)
  208. return t.__data:foreach(cb)
  209. end,
  210. on_load = function(t, cb)
  211. t.__data:on_load(cb)
  212. end
  213. }
  214. local ret_mt = {
  215. __index = function(t, k, key_callback, task)
  216. if t.__data then
  217. return t.get_key(k, key_callback, task)
  218. end
  219. return nil
  220. end
  221. }
  222. if not opt then
  223. return nil
  224. end
  225. local function maybe_register_selector()
  226. if opt.selector_alias then
  227. local lua_selectors = require "lua_selectors"
  228. lua_selectors.add_map(opt.selector_alias, ret)
  229. end
  230. end
  231. if type(opt) == 'string' then
  232. opt, mtype = maybe_adjust_type(opt, mtype)
  233. local cache_key = map_hash_key(opt, mtype)
  234. if not callback and maps_cache[cache_key] then
  235. rspamd_logger.infox(rspamd_config, 'reuse url for %s(%s)',
  236. opt, mtype)
  237. return maps_cache[cache_key]
  238. end
  239. -- We have a single string, so we treat it as a map
  240. local map = rspamd_config:add_map {
  241. type = mtype,
  242. description = description,
  243. url = opt,
  244. }
  245. if map then
  246. ret.__data = map
  247. ret.hash = cache_key
  248. setmetatable(ret, ret_mt)
  249. maps_cache[cache_key] = ret
  250. return ret
  251. end
  252. elseif type(opt) == 'table' then
  253. local cache_key = lua_util.table_digest(opt)
  254. if not callback and maps_cache[cache_key] then
  255. rspamd_logger.infox(rspamd_config, 'reuse url for complex map definition %s: %s',
  256. cache_key:sub(1, 8), description)
  257. return maps_cache[cache_key]
  258. end
  259. if opt[1] then
  260. local function check_plain_map(line)
  261. return lua_util.str_startswith(line, 'http')
  262. or lua_util.str_startswith(line, 'file:')
  263. or lua_util.str_startswith(line, '/')
  264. end
  265. -- Adjust each element if needed
  266. local adjusted
  267. for i, source in ipairs(opt) do
  268. local nsrc, ntype = maybe_adjust_type(source, mtype)
  269. if mtype ~= ntype then
  270. if not adjusted then
  271. mtype = ntype
  272. end
  273. adjusted = true
  274. end
  275. opt[i] = nsrc
  276. end
  277. if mtype == 'radix' then
  278. if string.find(opt[1], '^%d') then
  279. -- List of numeric stuff (hope it's ipnets definitions)
  280. local map = rspamd_config:radix_from_ucl(opt)
  281. if map then
  282. ret.__data = map
  283. setmetatable(ret, ret_mt)
  284. maps_cache[cache_key] = ret
  285. maybe_register_selector()
  286. return ret
  287. end
  288. else
  289. -- Plain table
  290. local map = rspamd_config:add_map {
  291. type = mtype,
  292. description = description,
  293. url = opt,
  294. }
  295. if map then
  296. ret.__data = map
  297. setmetatable(ret, ret_mt)
  298. maps_cache[cache_key] = ret
  299. maybe_register_selector()
  300. return ret
  301. end
  302. end
  303. elseif mtype == 'regexp' or mtype == 'glob' then
  304. if check_plain_map(opt[1]) then
  305. -- Plain table
  306. local map = rspamd_config:add_map {
  307. type = mtype,
  308. description = description,
  309. url = opt,
  310. }
  311. if map then
  312. ret.__data = map
  313. setmetatable(ret, ret_mt)
  314. maps_cache[cache_key] = ret
  315. maybe_register_selector()
  316. return ret
  317. end
  318. else
  319. local map = rspamd_config:add_map {
  320. type = mtype,
  321. description = description,
  322. url = {
  323. url = 'static',
  324. data = opt,
  325. }
  326. }
  327. if map then
  328. ret.__data = map
  329. setmetatable(ret, ret_mt)
  330. maps_cache[cache_key] = ret
  331. maybe_register_selector()
  332. return ret
  333. end
  334. end
  335. else
  336. -- Not regexp/glob
  337. if check_plain_map(opt[1]) then
  338. -- Plain table
  339. local map = rspamd_config:add_map {
  340. type = mtype,
  341. description = description,
  342. url = opt,
  343. }
  344. if map then
  345. ret.__data = map
  346. setmetatable(ret, ret_mt)
  347. maps_cache[cache_key] = ret
  348. maybe_register_selector()
  349. return ret
  350. end
  351. else
  352. local data = {}
  353. local nelts = 0
  354. -- Plain array of keys, count merely numeric elts
  355. for _, elt in ipairs(opt) do
  356. if type(elt) == 'string' then
  357. -- Numeric table
  358. if mtype == 'hash' then
  359. -- Treat as KV pair
  360. local pieces = lua_util.str_split(elt, ' ')
  361. if #pieces > 1 then
  362. local key = table.remove(pieces, 1)
  363. data[key] = table.concat(pieces, ' ')
  364. else
  365. data[elt] = true
  366. end
  367. else
  368. data[elt] = true
  369. end
  370. nelts = nelts + 1
  371. end
  372. end
  373. if nelts > 0 then
  374. -- Plain Lua table that is used as a map
  375. ret.__data = data
  376. ret.get_key = function(t, k)
  377. if k ~= '__data' then
  378. return t.__data[k]
  379. end
  380. return nil
  381. end
  382. ret.foreach = function(_, func)
  383. for k, v in pairs(ret.__data) do
  384. if not func(k, v) then
  385. return false
  386. end
  387. end
  388. return true
  389. end
  390. ret.on_load = function(_, cb)
  391. rspamd_config:add_on_load(function(_, _, _)
  392. cb()
  393. end)
  394. end
  395. maps_cache[cache_key] = ret
  396. maybe_register_selector()
  397. return ret
  398. else
  399. -- Empty map, huh?
  400. rspamd_logger.errx(rspamd_config, 'invalid map element: %s',
  401. opt)
  402. end
  403. end
  404. end
  405. else
  406. if opt.external then
  407. -- External map definition, missing fields are handled by schema
  408. local parse_res, parse_err = external_map_schema(opt)
  409. if parse_res then
  410. ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend)
  411. if ret.__upstreams then
  412. ret.__data = opt
  413. ret.__external = true
  414. setmetatable(ret, ret_mt)
  415. maybe_register_selector()
  416. return ret
  417. else
  418. rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s',
  419. opt.backend)
  420. end
  421. else
  422. rspamd_logger.errx(rspamd_config, 'cannot parse external map: %s',
  423. parse_err)
  424. end
  425. else
  426. -- Adjust lua specific augmentations in a trivial case
  427. if type(opt.url) == 'string' then
  428. local nsrc, ntype = maybe_adjust_type(opt.url, mtype)
  429. if nsrc and ntype then
  430. opt.url = nsrc
  431. mtype = ntype
  432. end
  433. end
  434. -- We have some non-trivial object so let C code to deal with it somehow...
  435. local map = rspamd_config:add_map {
  436. type = mtype,
  437. description = description,
  438. url = opt,
  439. }
  440. if map then
  441. ret.__data = map
  442. setmetatable(ret, ret_mt)
  443. maps_cache[cache_key] = ret
  444. maybe_register_selector()
  445. return ret
  446. end
  447. end
  448. end -- opt[1]
  449. end
  450. return nil
  451. end
  452. --[[[
  453. -- @function lua_maps.map_add(mname, optname, mtype, description)
  454. -- Creates a map from configuration elements (static data or URL)
  455. -- Returns true if map was added or nil
  456. -- @param {string} mname config section to use
  457. -- @param {string} optname option name to use
  458. -- @param {string} mtype type of map ('set', 'hash', 'radix', 'regexp', 'glob')
  459. -- @param {string} description human-readable description of map
  460. -- @param {function} callback optional callback that will be called on map match (required for external maps)
  461. -- @return {bool} true on success, or `nil`
  462. --]]
  463. local function rspamd_map_add(mname, optname, mtype, description, callback)
  464. local opt = rspamd_config:get_module_opt(mname, optname)
  465. return rspamd_map_add_from_ucl(opt, mtype, description, callback)
  466. end
  467. exports.rspamd_map_add = rspamd_map_add
  468. exports.map_add = rspamd_map_add
  469. exports.rspamd_map_add_from_ucl = rspamd_map_add_from_ucl
  470. exports.map_add_from_ucl = rspamd_map_add_from_ucl
  471. -- Check `what` for being lua_map name, otherwise just compares key with what
  472. local function rspamd_maybe_check_map(key, what)
  473. local fun = require "fun"
  474. if type(what) == "table" then
  475. return fun.any(function(elt)
  476. return rspamd_maybe_check_map(key, elt)
  477. end, what)
  478. end
  479. if type(rspamd_maps) == "table" then
  480. local mn
  481. if starts(key, "map:") then
  482. mn = string.sub(key, 5)
  483. elseif starts(key, "map://") then
  484. mn = string.sub(key, 7)
  485. end
  486. if mn and rspamd_maps[mn] then
  487. return rspamd_maps[mn]:get_key(what)
  488. end
  489. end
  490. return what:lower() == key
  491. end
  492. exports.rspamd_maybe_check_map = rspamd_maybe_check_map
  493. --[[[
  494. -- @function lua_maps.fill_config_maps(mname, options, defs)
  495. -- Fill maps that could be defined in defs, from the config in the options
  496. -- Defs is a table indexed by a map's parameter name and defining it's config,
  497. -- @example
  498. -- defs = {
  499. -- my_map = {
  500. -- type = 'map',
  501. -- description = 'my cool map',
  502. -- optional = true,
  503. -- }
  504. -- }
  505. -- -- Then this function will look for opts.my_map parameter and try to replace it with
  506. -- -- a map with the specific type, description but not failing if it was empty.
  507. -- -- It will also set options.my_map_orig to the original value defined in the map.
  508. --]]
  509. exports.fill_config_maps = function(mname, opts, map_defs)
  510. assert(type(opts) == 'table')
  511. assert(type(map_defs) == 'table')
  512. for k, v in pairs(map_defs) do
  513. if opts[k] then
  514. local map = rspamd_map_add_from_ucl(opts[k], v.type or 'map', v.description)
  515. if not map then
  516. rspamd_logger.errx(rspamd_config, 'map add error %s for module %s', k, mname)
  517. return false
  518. end
  519. opts[k .. '_orig'] = opts[k]
  520. opts[k] = map
  521. elseif not v.optional then
  522. rspamd_logger.errx(rspamd_config, 'cannot find non optional map %s for module %s', k, mname)
  523. return false
  524. end
  525. end
  526. return true
  527. end
  528. local direct_map_schema = ts.shape { -- complex object
  529. name = ts.string:is_optional(),
  530. description = ts.string:is_optional(),
  531. selector_alias = ts.string:is_optional(), -- an optional alias for the selectos framework
  532. timeout = ts.number,
  533. data = ts.array_of(ts.string):is_optional(),
  534. -- Tableshape has no options support for something like key1 or key2?
  535. upstreams = ts.one_of {
  536. ts.string,
  537. ts.array_of(ts.string),
  538. } :is_optional(),
  539. url = ts.one_of {
  540. ts.string,
  541. ts.array_of(ts.string),
  542. } :is_optional(),
  543. }
  544. exports.map_schema = ts.one_of {
  545. ts.string, -- 'http://some_map'
  546. ts.array_of(ts.string), -- ['foo', 'bar']
  547. ts.one_of { direct_map_schema, external_map_schema }
  548. }
  549. return exports