Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

lua_maps.lua 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. --[[[
  2. -- @module lua_maps
  3. -- This module contains helper functions for managing rspamd maps
  4. --]]
  5. --[[
  6. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  7. Licensed under the Apache License, Version 2.0 (the "License");
  8. you may not use this file except in compliance with the License.
  9. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. ]]--
  17. local rspamd_logger = require "rspamd_logger"
  18. local ts = require("tableshape").types
  19. local lua_util = require "lua_util"
  20. local exports = {}
  21. local maps_cache = {}
  22. local function map_hash_key(data, mtype)
  23. local hash = require "rspamd_cryptobox_hash"
  24. local st = hash.create_specific('xxh64')
  25. st:update(data)
  26. st:update(mtype)
  27. return st:hex()
  28. end
  29. local function starts(where, st)
  30. return string.sub(where, 1, string.len(st)) == st
  31. end
  32. local function cut_prefix(where, st)
  33. return string.sub(where, #st + 1)
  34. end
  35. local function maybe_adjust_type(data, mtype)
  36. local function check_prefix(prefix, t)
  37. if starts(data, prefix) then
  38. data = cut_prefix(data, prefix)
  39. mtype = t
  40. return true
  41. end
  42. return false
  43. end
  44. local known_types = {
  45. { 'regexp;', 'regexp' },
  46. { 're;', 'regexp' },
  47. { 'regexp_multi;', 'regexp_multi' },
  48. { 're_multi;', 'regexp_multi' },
  49. { 'glob;', 'glob' },
  50. { 'glob_multi;', 'glob_multi' },
  51. { 'radix;', 'radix' },
  52. { 'ipnet;', 'radix' },
  53. { 'set;', 'set' },
  54. { 'hash;', 'hash' },
  55. { 'plain;', 'hash' },
  56. { 'cdb;', 'cdb' },
  57. { 'cdb:/', 'cdb' },
  58. }
  59. if mtype == 'callback' then
  60. return mtype
  61. end
  62. for _, t in ipairs(known_types) do
  63. if check_prefix(t[1], t[2]) then
  64. return data, mtype
  65. end
  66. end
  67. -- No change
  68. return data, mtype
  69. end
  70. local external_map_schema = ts.shape {
  71. external = ts.equivalent(true), -- must be true
  72. backend = ts.string, -- where to get data, required
  73. method = ts.one_of { "body", "header", "query" }, -- how to pass input
  74. encode = ts.one_of { "json", "messagepack" }:is_optional(), -- how to encode input (if relevant)
  75. timeout = (ts.number + ts.string / lua_util.parse_time_interval):is_optional(),
  76. }
  77. local rspamd_http = require "rspamd_http"
  78. local ucl = require "ucl"
  79. local function url_encode_string(str)
  80. str = string.gsub(str, "([^%w _%%%-%.~])",
  81. function(c)
  82. return string.format("%%%02X", string.byte(c))
  83. end)
  84. str = string.gsub(str, " ", "+")
  85. return str
  86. end
  87. assert(url_encode_string('上海+中國') == '%E4%B8%8A%E6%B5%B7%2B%E4%B8%AD%E5%9C%8B')
  88. assert(url_encode_string('? and the Mysterians') == '%3F+and+the+Mysterians')
  89. local function query_external_map(map_config, upstreams, key, callback, task)
  90. local http_method = (map_config.method == 'body' or map_config.method == 'form') and 'POST' or 'GET'
  91. local upstream = upstreams:get_upstream_round_robin()
  92. local http_headers = {
  93. ['Accept'] = '*/*'
  94. }
  95. local http_body = nil
  96. local url = map_config.backend
  97. if type(key) == 'string' or type(key) == 'userdata' then
  98. if map_config.method == 'body' then
  99. http_body = key
  100. http_headers['Content-Type'] = 'text/plain'
  101. elseif map_config.method == 'header' then
  102. http_headers = {
  103. key = key
  104. }
  105. elseif map_config.method == 'query' then
  106. url = string.format('%s?key=%s', url, url_encode_string(tostring(key)))
  107. end
  108. elseif type(key) == 'table' then
  109. if map_config.method == 'body' then
  110. if map_config.encode == 'json' then
  111. http_body = ucl.to_format(key, 'json-compact', true)
  112. http_headers['Content-Type'] = 'application/json'
  113. elseif map_config.encode == 'messagepack' then
  114. http_body = ucl.to_format(key, 'messagepack', true)
  115. http_headers['Content-Type'] = 'application/msgpack'
  116. else
  117. local caller = debug.getinfo(2) or {}
  118. rspamd_logger.errx(task,
  119. "requested external map key with a wrong combination body method and missing encode; caller: %s:%s",
  120. caller.short_src, caller.currentline)
  121. callback(false, 'invalid map usage', 500, task)
  122. end
  123. else
  124. -- query/header and no encode
  125. if map_config.method == 'query' then
  126. local params_table = {}
  127. for k, v in pairs(key) do
  128. if type(v) == 'string' then
  129. table.insert(params_table, string.format('%s=%s', url_encode_string(k), url_encode_string(v)))
  130. end
  131. end
  132. url = string.format('%s?%s', url, table.concat(params_table, '&'))
  133. elseif map_config.method == 'header' then
  134. http_headers = key
  135. else
  136. local caller = debug.getinfo(2) or {}
  137. rspamd_logger.errx(task,
  138. "requested external map key with a wrong combination of encode and input; caller: %s:%s",
  139. caller.short_src, caller.currentline)
  140. callback(false, 'invalid map usage', 500, task)
  141. return
  142. end
  143. end
  144. end
  145. local function map_callback(err, code, body, _)
  146. if err then
  147. callback(false, err, code, task)
  148. elseif code == 200 then
  149. callback(true, body, 200, task)
  150. else
  151. callback(false, err, code, task)
  152. end
  153. end
  154. local ret = rspamd_http.request {
  155. task = task,
  156. url = url,
  157. callback = map_callback,
  158. timeout = map_config.timeout or 1.0,
  159. keepalive = true,
  160. upstream = upstream,
  161. method = http_method,
  162. headers = http_headers,
  163. body = http_body,
  164. }
  165. if not ret then
  166. callback(false, 'http request error', 500, task)
  167. end
  168. end
  169. --[[[
  170. -- @function lua_maps.map_add_from_ucl(opt, mtype, description)
  171. -- Creates a map from static data
  172. -- Returns true if map was added or nil
  173. -- @param {string or table} opt data for map (or URL)
  174. -- @param {string} mtype type of map (`set`, `map`, `radix`, `regexp`)
  175. -- @param {string} description human-readable description of map
  176. -- @param {function} callback optional callback that will be called on map match (required for external maps)
  177. -- @return {bool} true on success, or `nil`
  178. --]]
  179. local function rspamd_map_add_from_ucl(opt, mtype, description, callback)
  180. local ret = {
  181. get_key = function(t, k, key_callback, task)
  182. if t.__data then
  183. local cb = key_callback or callback
  184. if t.__external then
  185. if not cb or not task then
  186. local caller = debug.getinfo(2) or {}
  187. rspamd_logger.errx(rspamd_config, "requested external map key without callback or task; caller: %s:%s",
  188. caller.short_src, caller.currentline)
  189. return nil
  190. end
  191. query_external_map(t.__data, t.__upstreams, k, cb, task)
  192. else
  193. local result = t.__data:get_key(k)
  194. if cb then
  195. if result then
  196. cb(true, result, 200, task)
  197. else
  198. cb(false, 'not found', 404, task)
  199. end
  200. else
  201. return result
  202. end
  203. end
  204. end
  205. return nil
  206. end,
  207. foreach = function(t, cb)
  208. return t.__data:foreach(cb)
  209. end,
  210. on_load = function(t, cb)
  211. t.__data:on_load(cb)
  212. end
  213. }
  214. local ret_mt = {
  215. __index = function(t, k, key_callback, task)
  216. if t.__data then
  217. return t.get_key(k, key_callback, task)
  218. end
  219. return nil
  220. end
  221. }
  222. if not opt then
  223. return nil
  224. end
  225. local function maybe_register_selector()
  226. if opt.selector_alias then
  227. local lua_selectors = require "lua_selectors"
  228. lua_selectors.add_map(opt.selector_alias, ret)
  229. end
  230. end
  231. if type(opt) == 'string' then
  232. opt, mtype = maybe_adjust_type(opt, mtype)
  233. local cache_key = map_hash_key(opt, mtype)
  234. if not callback and maps_cache[cache_key] then
  235. rspamd_logger.infox(rspamd_config, 'reuse url for %s(%s)',
  236. opt, mtype)
  237. return maps_cache[cache_key]
  238. end
  239. -- We have a single string, so we treat it as a map
  240. local map = rspamd_config:add_map {
  241. type = mtype,
  242. description = description,
  243. url = opt,
  244. }
  245. if map then
  246. ret.__data = map
  247. ret.hash = cache_key
  248. setmetatable(ret, ret_mt)
  249. maps_cache[cache_key] = ret
  250. return ret
  251. end
  252. elseif type(opt) == 'table' then
  253. local cache_key = lua_util.table_digest(opt)
  254. if not callback and maps_cache[cache_key] then
  255. rspamd_logger.infox(rspamd_config, 'reuse url for complex map definition %s: %s',
  256. cache_key:sub(1, 8), description)
  257. return maps_cache[cache_key]
  258. end
  259. if opt[1] then
  260. -- Adjust each element if needed
  261. local adjusted
  262. for i, source in ipairs(opt) do
  263. local nsrc, ntype = maybe_adjust_type(source, mtype)
  264. if mtype ~= ntype then
  265. if not adjusted then
  266. mtype = ntype
  267. end
  268. adjusted = true
  269. end
  270. opt[i] = nsrc
  271. end
  272. if mtype == 'radix' then
  273. if string.find(opt[1], '^%d') then
  274. local map = rspamd_config:radix_from_ucl(opt)
  275. if map then
  276. ret.__data = map
  277. setmetatable(ret, ret_mt)
  278. maps_cache[cache_key] = ret
  279. maybe_register_selector()
  280. return ret
  281. end
  282. else
  283. -- Plain table
  284. local map = rspamd_config:add_map {
  285. type = mtype,
  286. description = description,
  287. url = opt,
  288. }
  289. if map then
  290. ret.__data = map
  291. setmetatable(ret, ret_mt)
  292. maps_cache[cache_key] = ret
  293. maybe_register_selector()
  294. return ret
  295. end
  296. end
  297. elseif mtype == 'regexp' or mtype == 'glob' then
  298. if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then
  299. -- Plain table
  300. local map = rspamd_config:add_map {
  301. type = mtype,
  302. description = description,
  303. url = opt,
  304. }
  305. if map then
  306. ret.__data = map
  307. setmetatable(ret, ret_mt)
  308. maps_cache[cache_key] = ret
  309. maybe_register_selector()
  310. return ret
  311. end
  312. else
  313. local map = rspamd_config:add_map {
  314. type = mtype,
  315. description = description,
  316. url = {
  317. url = 'static',
  318. data = opt,
  319. }
  320. }
  321. if map then
  322. ret.__data = map
  323. setmetatable(ret, ret_mt)
  324. maps_cache[cache_key] = ret
  325. maybe_register_selector()
  326. return ret
  327. end
  328. end
  329. else
  330. if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then
  331. -- Plain table
  332. local map = rspamd_config:add_map {
  333. type = mtype,
  334. description = description,
  335. url = opt,
  336. }
  337. if map then
  338. ret.__data = map
  339. setmetatable(ret, ret_mt)
  340. maps_cache[cache_key] = ret
  341. maybe_register_selector()
  342. return ret
  343. end
  344. else
  345. local data = {}
  346. local nelts = 0
  347. -- Plain array of keys, count merely numeric elts
  348. for _, elt in ipairs(opt) do
  349. if type(elt) == 'string' then
  350. -- Numeric table
  351. if mtype == 'hash' then
  352. -- Treat as KV pair
  353. local pieces = lua_util.str_split(elt, ' ')
  354. if #pieces > 1 then
  355. local key = table.remove(pieces, 1)
  356. data[key] = table.concat(pieces, ' ')
  357. else
  358. data[elt] = true
  359. end
  360. else
  361. data[elt] = true
  362. end
  363. nelts = nelts + 1
  364. end
  365. end
  366. if nelts > 0 then
  367. -- Plain Lua table that is used as a map
  368. ret.__data = data
  369. ret.get_key = function(t, k)
  370. if k ~= '__data' then
  371. return t.__data[k]
  372. end
  373. return nil
  374. end
  375. ret.foreach = function(_, func)
  376. for k, v in pairs(ret.__data) do
  377. if not func(k, v) then
  378. return false
  379. end
  380. end
  381. return true
  382. end
  383. ret.on_load = function(_, cb)
  384. rspamd_config:add_on_load(function(_, _, _)
  385. cb()
  386. end)
  387. end
  388. maps_cache[cache_key] = ret
  389. maybe_register_selector()
  390. return ret
  391. else
  392. -- Empty map, huh?
  393. rspamd_logger.errx(rspamd_config, 'invalid map element: %s',
  394. opt)
  395. end
  396. end
  397. end
  398. else
  399. if opt.external then
  400. -- External map definition, missing fields are handled by schema
  401. local parse_res, parse_err = external_map_schema(opt)
  402. if parse_res then
  403. ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend)
  404. if ret.__upstreams then
  405. ret.__data = opt
  406. ret.__external = true
  407. setmetatable(ret, ret_mt)
  408. maybe_register_selector()
  409. return ret
  410. else
  411. rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s',
  412. opt.backend)
  413. end
  414. else
  415. rspamd_logger.errx(rspamd_config, 'cannot parse external map: %s',
  416. parse_err)
  417. end
  418. else
  419. -- Adjust lua specific augmentations in a trivial case
  420. if type(opt.url) == 'string' then
  421. local nsrc, ntype = maybe_adjust_type(opt.url, mtype)
  422. if nsrc and ntype then
  423. opt.url = nsrc
  424. mtype = ntype
  425. end
  426. end
  427. -- We have some non-trivial object so let C code to deal with it somehow...
  428. local map = rspamd_config:add_map {
  429. type = mtype,
  430. description = description,
  431. url = opt,
  432. }
  433. if map then
  434. ret.__data = map
  435. setmetatable(ret, ret_mt)
  436. maps_cache[cache_key] = ret
  437. maybe_register_selector()
  438. return ret
  439. end
  440. end
  441. end -- opt[1]
  442. end
  443. return nil
  444. end
  445. --[[[
  446. -- @function lua_maps.map_add(mname, optname, mtype, description)
  447. -- Creates a map from configuration elements (static data or URL)
  448. -- Returns true if map was added or nil
  449. -- @param {string} mname config section to use
  450. -- @param {string} optname option name to use
  451. -- @param {string} mtype type of map ('set', 'hash', 'radix', 'regexp', 'glob')
  452. -- @param {string} description human-readable description of map
  453. -- @param {function} callback optional callback that will be called on map match (required for external maps)
  454. -- @return {bool} true on success, or `nil`
  455. --]]
  456. local function rspamd_map_add(mname, optname, mtype, description, callback)
  457. local opt = rspamd_config:get_module_opt(mname, optname)
  458. return rspamd_map_add_from_ucl(opt, mtype, description, callback)
  459. end
  460. exports.rspamd_map_add = rspamd_map_add
  461. exports.map_add = rspamd_map_add
  462. exports.rspamd_map_add_from_ucl = rspamd_map_add_from_ucl
  463. exports.map_add_from_ucl = rspamd_map_add_from_ucl
  464. -- Check `what` for being lua_map name, otherwise just compares key with what
  465. local function rspamd_maybe_check_map(key, what)
  466. local fun = require "fun"
  467. if type(what) == "table" then
  468. return fun.any(function(elt)
  469. return rspamd_maybe_check_map(key, elt)
  470. end, what)
  471. end
  472. if type(rspamd_maps) == "table" then
  473. local mn
  474. if starts(key, "map:") then
  475. mn = string.sub(key, 5)
  476. elseif starts(key, "map://") then
  477. mn = string.sub(key, 7)
  478. end
  479. if mn and rspamd_maps[mn] then
  480. return rspamd_maps[mn]:get_key(what)
  481. end
  482. end
  483. return what:lower() == key
  484. end
  485. exports.rspamd_maybe_check_map = rspamd_maybe_check_map
  486. --[[[
  487. -- @function lua_maps.fill_config_maps(mname, options, defs)
  488. -- Fill maps that could be defined in defs, from the config in the options
  489. -- Defs is a table indexed by a map's parameter name and defining it's config,
  490. -- @example
  491. -- defs = {
  492. -- my_map = {
  493. -- type = 'map',
  494. -- description = 'my cool map',
  495. -- optional = true,
  496. -- }
  497. -- }
  498. -- -- Then this function will look for opts.my_map parameter and try to replace it's with
  499. -- -- a map with the specific type, description but not failing if it was empty.
  500. -- -- It will also set options.my_map_orig to the original value defined in the map.
  501. --]]
  502. exports.fill_config_maps = function(mname, opts, map_defs)
  503. assert(type(opts) == 'table')
  504. assert(type(map_defs) == 'table')
  505. for k, v in pairs(map_defs) do
  506. if opts[k] then
  507. local map = rspamd_map_add_from_ucl(opts[k], v.type or 'map', v.description)
  508. if not map then
  509. rspamd_logger.errx(rspamd_config, 'map add error %s for module %s', k, mname)
  510. return false
  511. end
  512. opts[k .. '_orig'] = opts[k]
  513. opts[k] = map
  514. elseif not v.optional then
  515. rspamd_logger.errx(rspamd_config, 'cannot find non optional map %s for module %s', k, mname)
  516. return false
  517. end
  518. end
  519. return true
  520. end
  521. local direct_map_schema = ts.shape { -- complex object
  522. name = ts.string:is_optional(),
  523. description = ts.string:is_optional(),
  524. selector_alias = ts.string:is_optional(), -- an optional alias for the selectos framework
  525. timeout = ts.number,
  526. data = ts.array_of(ts.string):is_optional(),
  527. -- Tableshape has no options support for something like key1 or key2?
  528. upstreams = ts.one_of {
  529. ts.string,
  530. ts.array_of(ts.string),
  531. } :is_optional(),
  532. url = ts.one_of {
  533. ts.string,
  534. ts.array_of(ts.string),
  535. } :is_optional(),
  536. }
  537. exports.map_schema = ts.one_of {
  538. ts.string, -- 'http://some_map'
  539. ts.array_of(ts.string), -- ['foo', 'bar']
  540. ts.one_of { direct_map_schema, external_map_schema }
  541. }
  542. return exports