You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_util.lua 36KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381
  1. --[[
  2. Copyright (c) 2017, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module lua_util
  15. -- This module contains utility functions for working with Lua and/or Rspamd
  16. --]]
  17. local exports = {}
  18. local lpeg = require 'lpeg'
  19. local rspamd_util = require "rspamd_util"
  20. local fun = require "fun"
  21. local lupa = require "lupa"
  22. local split_grammar = {}
  23. local spaces_split_grammar
  24. local space = lpeg.S' \t\n\v\f\r'
  25. local nospace = 1 - space
  26. local ptrim = space^0 * lpeg.C((space^0 * nospace^1)^0)
  27. local match = lpeg.match
  28. lupa.configure('{%', '%}', '{=', '=}', '{#', '#}', {
  29. keep_trailing_newline = true,
  30. autoescape = false,
  31. })
  32. lupa.filters.pbkdf = function(s)
  33. local cr = require "rspamd_cryptobox"
  34. return cr.pbkdf(s)
  35. end
  36. local function rspamd_str_split(s, sep)
  37. local gr
  38. if not sep then
  39. if not spaces_split_grammar then
  40. local _sep = space
  41. local elem = lpeg.C((1 - _sep)^0)
  42. local p = lpeg.Ct(elem * (_sep * elem)^0)
  43. spaces_split_grammar = p
  44. end
  45. gr = spaces_split_grammar
  46. else
  47. gr = split_grammar[sep]
  48. if not gr then
  49. local _sep
  50. if type(sep) == 'string' then
  51. _sep = lpeg.S(sep) -- Assume set
  52. else
  53. _sep = sep -- Assume lpeg object
  54. end
  55. local elem = lpeg.C((1 - _sep)^0)
  56. local p = lpeg.Ct(elem * (_sep * elem)^0)
  57. gr = p
  58. split_grammar[sep] = gr
  59. end
  60. end
  61. return gr:match(s)
  62. end
  63. --[[[
  64. -- @function lua_util.str_split(text, deliminator)
  65. -- Splits text into a numeric table by deliminator
  66. -- @param {string} text deliminated text
  67. -- @param {string} deliminator the deliminator
  68. -- @return {table} numeric table containing string parts
  69. --]]
  70. exports.rspamd_str_split = rspamd_str_split
  71. exports.str_split = rspamd_str_split
  72. local function rspamd_str_trim(s)
  73. return match(ptrim, s)
  74. end
  75. exports.rspamd_str_trim = rspamd_str_trim
  76. --[[[
  77. -- @function lua_util.str_trim(text)
  78. -- Returns a string with no trailing and leading spaces
  79. -- @param {string} text input text
  80. -- @return {string} string with no trailing and leading spaces
  81. --]]
  82. exports.str_trim = rspamd_str_trim
  83. --[[[
  84. -- @function lua_util.round(number, decimalPlaces)
  85. -- Round number to fixed number of decimal points
  86. -- @param {number} number number to round
  87. -- @param {number} decimalPlaces number of decimal points
  88. -- @return {number} rounded number
  89. --]]
  90. -- Robert Jay Gould http://lua-users.org/wiki/SimpleRound
  91. exports.round = function(num, numDecimalPlaces)
  92. local mult = 10^(numDecimalPlaces or 0)
  93. return math.floor(num * mult) / mult
  94. end
  95. --[[[
  96. -- @function lua_util.template(text, replacements)
  97. -- Replaces values in a text template
  98. -- Variable names can contain letters, numbers and underscores, are prefixed with `$` and may or not use curly braces.
  99. -- @param {string} text text containing variables
  100. -- @param {table} replacements key/value pairs for replacements
  101. -- @return {string} string containing replaced values
  102. -- @example
  103. -- local goop = lua_util.template("HELLO $FOO ${BAR}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
  104. -- -- goop contains "HELLO LUA WORLD!"
  105. --]]
  106. exports.template = function(tmpl, keys)
  107. local var_lit = lpeg.P { lpeg.R("az") + lpeg.R("AZ") + lpeg.R("09") + "_" }
  108. local var = lpeg.P { (lpeg.P("$") / "") * ((var_lit^1) / keys) }
  109. local var_braced = lpeg.P { (lpeg.P("${") / "") * ((var_lit^1) / keys) * (lpeg.P("}") / "") }
  110. local template_grammar = lpeg.Cs((var + var_braced + 1)^0)
  111. return lpeg.match(template_grammar, tmpl)
  112. end
  113. local function enrich_template_with_globals(env)
  114. local newenv = exports.shallowcopy(env)
  115. newenv.paths = rspamd_paths
  116. newenv.env = rspamd_env
  117. return newenv
  118. end
  119. --[[[
  120. -- @function lua_util.jinja_template(text, env[, skip_global_env])
  121. -- Replaces values in a text template according to jinja2 syntax
  122. -- @param {string} text text containing variables
  123. -- @param {table} replacements key/value pairs for replacements
  124. -- @param {boolean} skip_global_env don't export Rspamd superglobals
  125. -- @return {string} string containing replaced values
  126. -- @example
  127. -- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
  128. -- "HELLO LUA WORLD!"
  129. --]]
  130. exports.jinja_template = function(text, env, skip_global_env)
  131. if not skip_global_env then
  132. env = enrich_template_with_globals(env)
  133. end
  134. return lupa.expand(text, env)
  135. end
  136. --[[[
  137. -- @function lua_util.jinja_file(filename, env[, skip_global_env])
  138. -- Replaces values in a text template according to jinja2 syntax
  139. -- @param {string} filename name of file to expand
  140. -- @param {table} replacements key/value pairs for replacements
  141. -- @param {boolean} skip_global_env don't export Rspamd superglobals
  142. -- @return {string} string containing replaced values
  143. -- @example
  144. -- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
  145. -- "HELLO LUA WORLD!"
  146. --]]
  147. exports.jinja_template_file = function(filename, env, skip_global_env)
  148. if not skip_global_env then
  149. env = enrich_template_with_globals(env)
  150. end
  151. return lupa.expand_file(filename, env)
  152. end
  153. exports.remove_email_aliases = function(email_addr)
  154. local function check_gmail_user(addr)
  155. -- Remove all points
  156. local no_dots_user = string.gsub(addr.user, '%.', '')
  157. local cap, pluses = string.match(no_dots_user, '^([^%+][^%+]*)(%+.*)$')
  158. if cap then
  159. return cap, rspamd_str_split(pluses, '+'), nil
  160. elseif no_dots_user ~= addr.user then
  161. return no_dots_user,{},nil
  162. end
  163. return nil
  164. end
  165. local function check_address(addr)
  166. if addr.user then
  167. local cap, pluses = string.match(addr.user, '^([^%+][^%+]*)(%+.*)$')
  168. if cap then
  169. return cap, rspamd_str_split(pluses, '+'), nil
  170. end
  171. end
  172. return nil
  173. end
  174. local function set_addr(addr, new_user, new_domain)
  175. if new_user then
  176. addr.user = new_user
  177. end
  178. if new_domain then
  179. addr.domain = new_domain
  180. end
  181. if addr.domain then
  182. addr.addr = string.format('%s@%s', addr.user, addr.domain)
  183. else
  184. addr.addr = string.format('%s@', addr.user)
  185. end
  186. if addr.name and #addr.name > 0 then
  187. addr.raw = string.format('"%s" <%s>', addr.name, addr.addr)
  188. else
  189. addr.raw = string.format('<%s>', addr.addr)
  190. end
  191. end
  192. local function check_gmail(addr)
  193. local nu, tags, nd = check_gmail_user(addr)
  194. if nu then
  195. return nu, tags, nd
  196. end
  197. return nil
  198. end
  199. local function check_googlemail(addr)
  200. local nd = 'gmail.com'
  201. local nu, tags = check_gmail_user(addr)
  202. if nu then
  203. return nu, tags, nd
  204. end
  205. return nil, nil, nd
  206. end
  207. local specific_domains = {
  208. ['gmail.com'] = check_gmail,
  209. ['googlemail.com'] = check_googlemail,
  210. }
  211. if email_addr then
  212. if email_addr.domain and specific_domains[email_addr.domain] then
  213. local nu, tags, nd = specific_domains[email_addr.domain](email_addr)
  214. if nu or nd then
  215. set_addr(email_addr, nu, nd)
  216. return nu, tags
  217. end
  218. else
  219. local nu, tags, nd = check_address(email_addr)
  220. if nu or nd then
  221. set_addr(email_addr, nu, nd)
  222. return nu, tags
  223. end
  224. end
  225. return nil
  226. end
  227. end
  228. exports.is_rspamc_or_controller = function(task)
  229. local ua = task:get_request_header('User-Agent') or ''
  230. local pwd = task:get_request_header('Password')
  231. local is_rspamc = false
  232. if tostring(ua) == 'rspamc' or pwd then is_rspamc = true end
  233. return is_rspamc
  234. end
  235. --[[[
  236. -- @function lua_util.unpack(table)
  237. -- Converts numeric table to varargs
  238. -- This is `unpack` on Lua 5.1/5.2/LuaJIT and `table.unpack` on Lua 5.3
  239. -- @param {table} table numerically indexed table to unpack
  240. -- @return {varargs} unpacked table elements
  241. --]]
  242. local unpack_function = table.unpack or unpack
  243. exports.unpack = function(t)
  244. return unpack_function(t)
  245. end
  246. --[[[
  247. -- @function lua_util.flatten(table)
  248. -- Flatten underlying tables in a single table
  249. -- @param {table} table table of tables
  250. -- @return {table} flattened table
  251. --]]
  252. exports.flatten = function(t)
  253. local res = {}
  254. for _,e in fun.iter(t) do
  255. for _,v in fun.iter(e) do
  256. res[#res + 1] = v
  257. end
  258. end
  259. return res
  260. end
  261. --[[[
  262. -- @function lua_util.spairs(table)
  263. -- Like `pairs` but keys are sorted lexicographically
  264. -- @param {table} table table containing key/value pairs
  265. -- @return {function} generator function returning key/value pairs
  266. --]]
  267. -- Sorted iteration:
  268. -- for k,v in spairs(t) do ... end
  269. --
  270. -- or with custom comparison:
  271. -- for k, v in spairs(t, function(t, a, b) return t[a] < t[b] end)
  272. --
  273. -- optional limit is also available (e.g. return top X elements)
  274. local function spairs(t, order, lim)
  275. -- collect the keys
  276. local keys = {}
  277. for k in pairs(t) do keys[#keys+1] = k end
  278. -- if order function given, sort by it by passing the table and keys a, b,
  279. -- otherwise just sort the keys
  280. if order then
  281. table.sort(keys, function(a,b) return order(t, a, b) end)
  282. else
  283. table.sort(keys)
  284. end
  285. -- return the iterator function
  286. local i = 0
  287. return function()
  288. i = i + 1
  289. if not lim or i <= lim then
  290. if keys[i] then
  291. return keys[i], t[keys[i]]
  292. end
  293. end
  294. end
  295. end
  296. exports.spairs = spairs
  297. --[[[
  298. -- @function lua_util.disable_module(modname, how)
  299. -- Disables a plugin
  300. -- @param {string} modname name of plugin to disable
  301. -- @param {string} how 'redis' to disable redis, 'config' to disable startup
  302. --]]
  303. local function disable_module(modname, how)
  304. if rspamd_plugins_state.enabled[modname] then
  305. rspamd_plugins_state.enabled[modname] = nil
  306. end
  307. if how == 'redis' then
  308. rspamd_plugins_state.disabled_redis[modname] = {}
  309. elseif how == 'config' then
  310. rspamd_plugins_state.disabled_unconfigured[modname] = {}
  311. elseif how == 'experimental' then
  312. rspamd_plugins_state.disabled_experimental[modname] = {}
  313. else
  314. rspamd_plugins_state.disabled_failed[modname] = {}
  315. end
  316. end
  317. exports.disable_module = disable_module
  318. --[[[
  319. -- @function lua_util.disable_module(modname)
  320. -- Checks experimental plugins state and disable if needed
  321. -- @param {string} modname name of plugin to check
  322. -- @return {boolean} true if plugin should be enabled, false otherwise
  323. --]]
  324. local function check_experimental(modname)
  325. if rspamd_config:experimental_enabled() then
  326. return true
  327. else
  328. disable_module(modname, 'experimental')
  329. end
  330. return false
  331. end
  332. exports.check_experimental = check_experimental
  333. --[[[
  334. -- @function lua_util.list_to_hash(list)
  335. -- Converts numerically-indexed table to table indexed by values
  336. -- @param {table} list numerically-indexed table or string, which is treated as a one-element list
  337. -- @return {table} table indexed by values
  338. -- @example
  339. -- local h = lua_util.list_to_hash({"a", "b"})
  340. -- -- h contains {a = true, b = true}
  341. --]]
  342. local function list_to_hash(list)
  343. if type(list) == 'table' then
  344. if list[1] then
  345. local h = {}
  346. for _, e in ipairs(list) do
  347. h[e] = true
  348. end
  349. return h
  350. else
  351. return list
  352. end
  353. elseif type(list) == 'string' then
  354. local h = {}
  355. h[list] = true
  356. return h
  357. end
  358. end
  359. exports.list_to_hash = list_to_hash
  360. --[[[
  361. -- @function lua_util.nkeys(table|gen, param, state)
  362. -- Returns number of keys in a table (i.e. from both the array and hash parts combined)
  363. -- @param {table} list numerically-indexed table or string, which is treated as a one-element list
  364. -- @return {number} number of keys
  365. -- @example
  366. -- print(lua_util.nkeys({})) -- 0
  367. -- print(lua_util.nkeys({ "a", nil, "b" })) -- 2
  368. -- print(lua_util.nkeys({ dog = 3, cat = 4, bird = nil })) -- 2
  369. -- print(lua_util.nkeys({ "a", dog = 3, cat = 4 })) -- 3
  370. --
  371. --]]
  372. local function nkeys(gen, param, state)
  373. local n = 0
  374. if not param then
  375. for _,_ in pairs(gen) do n = n + 1 end
  376. else
  377. for _,_ in fun.iter(gen, param, state) do n = n + 1 end
  378. end
  379. return n
  380. end
  381. exports.nkeys = nkeys
  382. --[[[
  383. -- @function lua_util.parse_time_interval(str)
  384. -- Parses human readable time interval
  385. -- Accepts 's' for seconds, 'm' for minutes, 'h' for hours, 'd' for days,
  386. -- 'w' for weeks, 'y' for years
  387. -- @param {string} str input string
  388. -- @return {number|nil} parsed interval as seconds (might be fractional)
  389. --]]
  390. local function parse_time_interval(str)
  391. local function parse_time_suffix(s)
  392. if s == 's' then
  393. return 1
  394. elseif s == 'm' then
  395. return 60
  396. elseif s == 'h' then
  397. return 3600
  398. elseif s == 'd' then
  399. return 86400
  400. elseif s == 'w' then
  401. return 86400 * 7
  402. elseif s == 'y' then
  403. return 365 * 86400;
  404. end
  405. end
  406. local digit = lpeg.R("09")
  407. local parser = {}
  408. parser.integer =
  409. (lpeg.S("+-") ^ -1) *
  410. (digit ^ 1)
  411. parser.fractional =
  412. (lpeg.P(".") ) *
  413. (digit ^ 1)
  414. parser.number =
  415. (parser.integer *
  416. (parser.fractional ^ -1)) +
  417. (lpeg.S("+-") * parser.fractional)
  418. parser.time = lpeg.Cf(lpeg.Cc(1) *
  419. (parser.number / tonumber) *
  420. ((lpeg.S("smhdwy") / parse_time_suffix) ^ -1),
  421. function (acc, val) return acc * val end)
  422. local t = lpeg.match(parser.time, str)
  423. return t
  424. end
  425. exports.parse_time_interval = parse_time_interval
  426. --[[[
  427. -- @function lua_util.dehumanize_number(str)
  428. -- Parses human readable number
  429. -- Accepts 'k' for thousands, 'm' for millions, 'g' for billions, 'b' suffix for 1024 multiplier,
  430. -- e.g. `10mb` equal to `10 * 1024 * 1024`
  431. -- @param {string} str input string
  432. -- @return {number|nil} parsed number
  433. --]]
  434. local function dehumanize_number(str)
  435. local function parse_suffix(s)
  436. if s == 'k' then
  437. return 1000
  438. elseif s == 'm' then
  439. return 1000000
  440. elseif s == 'g' then
  441. return 1e9
  442. elseif s == 'kb' then
  443. return 1024
  444. elseif s == 'mb' then
  445. return 1024 * 1024
  446. elseif s == 'gb' then
  447. return 1024 * 1024;
  448. end
  449. end
  450. local digit = lpeg.R("09")
  451. local parser = {}
  452. parser.integer =
  453. (lpeg.S("+-") ^ -1) *
  454. (digit ^ 1)
  455. parser.fractional =
  456. (lpeg.P(".") ) *
  457. (digit ^ 1)
  458. parser.number =
  459. (parser.integer *
  460. (parser.fractional ^ -1)) +
  461. (lpeg.S("+-") * parser.fractional)
  462. parser.humanized_number = lpeg.Cf(lpeg.Cc(1) *
  463. (parser.number / tonumber) *
  464. (((lpeg.S("kmg") * (lpeg.P("b") ^ -1)) / parse_suffix) ^ -1),
  465. function (acc, val) return acc * val end)
  466. local t = lpeg.match(parser.humanized_number, str)
  467. return t
  468. end
  469. exports.dehumanize_number = dehumanize_number
  470. --[[[
  471. -- @function lua_util.table_cmp(t1, t2)
  472. -- Compare two tables deeply
  473. --]]
  474. local function table_cmp(table1, table2)
  475. local avoid_loops = {}
  476. local function recurse(t1, t2)
  477. if type(t1) ~= type(t2) then return false end
  478. if type(t1) ~= "table" then return t1 == t2 end
  479. if avoid_loops[t1] then return avoid_loops[t1] == t2 end
  480. avoid_loops[t1] = t2
  481. -- Copy keys from t2
  482. local t2keys = {}
  483. local t2tablekeys = {}
  484. for k, _ in pairs(t2) do
  485. if type(k) == "table" then table.insert(t2tablekeys, k) end
  486. t2keys[k] = true
  487. end
  488. -- Let's iterate keys from t1
  489. for k1, v1 in pairs(t1) do
  490. local v2 = t2[k1]
  491. if type(k1) == "table" then
  492. -- if key is a table, we need to find an equivalent one.
  493. local ok = false
  494. for i, tk in ipairs(t2tablekeys) do
  495. if table_cmp(k1, tk) and recurse(v1, t2[tk]) then
  496. table.remove(t2tablekeys, i)
  497. t2keys[tk] = nil
  498. ok = true
  499. break
  500. end
  501. end
  502. if not ok then return false end
  503. else
  504. -- t1 has a key which t2 doesn't have, fail.
  505. if v2 == nil then return false end
  506. t2keys[k1] = nil
  507. if not recurse(v1, v2) then return false end
  508. end
  509. end
  510. -- if t2 has a key which t1 doesn't have, fail.
  511. if next(t2keys) then return false end
  512. return true
  513. end
  514. return recurse(table1, table2)
  515. end
  516. exports.table_cmp = table_cmp
  517. --[[[
  518. -- @function lua_util.table_cmp(task, name, value, stop_chars)
  519. -- Performs header folding
  520. --]]
  521. exports.fold_header = function(task, name, value, stop_chars)
  522. local how
  523. if task:has_flag("milter") then
  524. how = "lf"
  525. else
  526. how = task:get_newlines_type()
  527. end
  528. return rspamd_util.fold_header(name, value, how, stop_chars)
  529. end
  530. --[[[
  531. -- @function lua_util.override_defaults(defaults, override)
  532. -- Overrides values from defaults with override
  533. --]]
  534. local function override_defaults(def, override)
  535. -- Corner cases
  536. if not override or type(override) ~= 'table' then
  537. return def
  538. end
  539. if not def or type(def) ~= 'table' then
  540. return override
  541. end
  542. local res = {}
  543. for k,v in pairs(override) do
  544. if type(v) == 'table' then
  545. if def[k] and type(def[k]) == 'table' then
  546. -- Recursively override elements
  547. res[k] = override_defaults(def[k], v)
  548. else
  549. res[k] = v
  550. end
  551. else
  552. res[k] = v
  553. end
  554. end
  555. for k,v in pairs(def) do
  556. if type(res[k]) == 'nil' then
  557. res[k] = v
  558. end
  559. end
  560. return res
  561. end
  562. exports.override_defaults = override_defaults
  563. --[[[
  564. -- @function lua_util.filter_specific_urls(urls, params)
  565. -- params: {
  566. - - task - if needed to save in the cache
  567. - - limit <int> (default = 9999)
  568. - - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
  569. works only if number of unique eSLD less than `limit`
  570. - - need_emails <bool> (default = false)
  571. - - filter <callback> (default = nil)
  572. - - prefix <string> cache prefix (default = nil)
  573. -- }
  574. -- Apply heuristic in extracting of urls from `urls` table, this function
  575. -- tries its best to extract specific number of urls from a task based on
  576. -- their characteristics
  577. --]]
  578. exports.filter_specific_urls = function (urls, params)
  579. local cache_key
  580. if params.task and not params.no_cache then
  581. if params.prefix then
  582. cache_key = params.prefix
  583. else
  584. cache_key = string.format('sp_urls_%d%s%s%s', params.limit,
  585. tostring(params.need_emails or false),
  586. tostring(params.need_images or false),
  587. tostring(params.need_content or false))
  588. end
  589. local cached = params.task:cache_get(cache_key)
  590. if cached then
  591. return cached
  592. end
  593. end
  594. if not urls then return {} end
  595. if params.filter then urls = fun.totable(fun.filter(params.filter, urls)) end
  596. -- Filter by tld:
  597. local tlds = {}
  598. local eslds = {}
  599. local ntlds, neslds = 0, 0
  600. local res = {}
  601. local nres = 0
  602. local function insert_url(str, u)
  603. if not res[str] then
  604. res[str] = u
  605. nres = nres + 1
  606. return true
  607. end
  608. return false
  609. end
  610. local function process_single_url(u, default_priority)
  611. local priority = default_priority or 1 -- Normal priority
  612. local flags = u:get_flags()
  613. if params.ignore_ip and flags.numeric then
  614. return
  615. end
  616. if flags.redirected then
  617. local redir = u:get_redirected() -- get the real url
  618. if params.ignore_redirected then
  619. -- Replace `u` with redir
  620. u = redir
  621. priority = 2
  622. else
  623. -- Process both redirected url and the original one
  624. process_single_url(redir, 2)
  625. end
  626. end
  627. if flags.image then
  628. if not params.need_images then
  629. -- Ignore url
  630. return
  631. else
  632. -- Penalise images in urls
  633. priority = 0
  634. end
  635. end
  636. local esld = u:get_tld()
  637. local str_hash = tostring(u)
  638. if esld then
  639. -- Special cases
  640. if (u:get_protocol() ~= 'mailto') and (not flags.html_displayed) then
  641. if flags.obscured then
  642. priority = 3
  643. else
  644. if (flags.has_user or flags.has_port) then
  645. priority = 2
  646. elseif (flags.subject or flags.phished) then
  647. priority = 2
  648. end
  649. end
  650. elseif flags.html_displayed then
  651. priority = 0
  652. end
  653. if not eslds[esld] then
  654. eslds[esld] = {{str_hash, u, priority}}
  655. neslds = neslds + 1
  656. else
  657. if #eslds[esld] < params.esld_limit then
  658. table.insert(eslds[esld], {str_hash, u, priority})
  659. end
  660. end
  661. -- eSLD - 1 part => tld
  662. local parts = rspamd_str_split(esld, '.')
  663. local tld = table.concat(fun.totable(fun.tail(parts)), '.')
  664. if not tlds[tld] then
  665. tlds[tld] = {{str_hash, u, priority}}
  666. ntlds = ntlds + 1
  667. else
  668. table.insert(tlds[tld], {str_hash, u, priority})
  669. end
  670. end
  671. end
  672. for _,u in ipairs(urls) do
  673. process_single_url(u)
  674. end
  675. local limit = params.limit
  676. limit = limit - nres
  677. if limit < 0 then limit = 0 end
  678. if limit == 0 then
  679. res = exports.values(res)
  680. if params.task and not params.no_cache then
  681. params.task:cache_set(cache_key, res)
  682. end
  683. return res
  684. end
  685. -- Sort eSLDs and tlds
  686. local function sort_stuff(tbl)
  687. -- Sort according to max priority
  688. table.sort(tbl, function(e1, e2)
  689. -- Sort by priority so max priority is at the end
  690. table.sort(e1, function(tr1, tr2)
  691. return tr1[3] < tr2[3]
  692. end)
  693. table.sort(e2, function(tr1, tr2)
  694. return tr1[3] < tr2[3]
  695. end)
  696. if e1[#e1][3] ~= e2[#e2][3] then
  697. -- Sort by priority so max priority is at the beginning
  698. return e1[#e1][3] > e2[#e2][3]
  699. else
  700. -- Prefer less urls to more urls per esld
  701. return #e1 < #e2
  702. end
  703. end)
  704. return tbl
  705. end
  706. eslds = sort_stuff(exports.values(eslds))
  707. neslds = #eslds
  708. if neslds <= limit then
  709. -- Number of eslds < limit
  710. repeat
  711. local item_found = false
  712. for _,lurls in ipairs(eslds) do
  713. if #lurls > 0 then
  714. local last = table.remove(lurls)
  715. insert_url(last[1], last[2])
  716. limit = limit - 1
  717. item_found = true
  718. end
  719. end
  720. until limit <= 0 or not item_found
  721. res = exports.values(res)
  722. if params.task and not params.no_cache then
  723. params.task:cache_set(cache_key, res)
  724. end
  725. return res
  726. end
  727. tlds = sort_stuff(exports.values(tlds))
  728. ntlds = #tlds
  729. -- Number of tlds < limit
  730. while limit > 0 do
  731. for _,lurls in ipairs(tlds) do
  732. if #lurls > 0 then
  733. local last = table.remove(lurls)
  734. insert_url(last[1], last[2])
  735. limit = limit - 1
  736. end
  737. if limit == 0 then break end
  738. end
  739. end
  740. res = exports.values(res)
  741. if params.task and not params.no_cache then
  742. params.task:cache_set(cache_key, res)
  743. end
  744. return res
  745. end
  746. --[[[
  747. -- @function lua_util.extract_specific_urls(params)
  748. -- params: {
  749. - - task
  750. - - limit <int> (default = 9999)
  751. - - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
  752. works only if number of unique eSLD less than `limit`
  753. - - need_emails <bool> (default = false)
  754. - - filter <callback> (default = nil)
  755. - - prefix <string> cache prefix (default = nil)
  756. - - ignore_redirected <bool> (default = false)
  757. - - need_images <bool> (default = false)
  758. - - need_content <bool> (default = false)
  759. -- }
  760. -- Apply heuristic in extracting of urls from task, this function
  761. -- tries its best to extract specific number of urls from a task based on
  762. -- their characteristics
  763. --]]
  764. -- exports.extract_specific_urls = function(params_or_task, limit, need_emails, filter, prefix)
  765. exports.extract_specific_urls = function(params_or_task, lim, need_emails, filter, prefix)
  766. local default_params = {
  767. limit = 9999,
  768. esld_limit = 9999,
  769. need_emails = false,
  770. need_images = false,
  771. need_content = false,
  772. filter = nil,
  773. prefix = nil,
  774. ignore_ip = false,
  775. ignore_redirected = false,
  776. no_cache = false,
  777. }
  778. local params
  779. if type(params_or_task) == 'table' and type(lim) == 'nil' then
  780. params = params_or_task
  781. else
  782. -- Deprecated call
  783. params = {
  784. task = params_or_task,
  785. limit = lim,
  786. need_emails = need_emails,
  787. filter = filter,
  788. prefix = prefix
  789. }
  790. end
  791. for k,v in pairs(default_params) do
  792. if type(params[k]) == 'nil' and v ~= nil then params[k] = v end
  793. end
  794. local url_params = {
  795. emails = params.need_emails,
  796. images = params.need_images,
  797. content = params.need_content,
  798. }
  799. -- Shortcut for cached stuff
  800. if params.task and not params.no_cache then
  801. local cache_key
  802. if params.prefix then
  803. cache_key = params.prefix
  804. else
  805. cache_key = string.format('sp_urls_%d%s%s%s', params.limit,
  806. tostring(params.need_emails or false),
  807. tostring(params.need_images or false),
  808. tostring(params.need_content or false))
  809. end
  810. local cached = params.task:cache_get(cache_key)
  811. if cached then
  812. return cached
  813. end
  814. end
  815. -- No cache version
  816. local urls = params.task:get_urls(url_params)
  817. return exports.filter_specific_urls(urls, params)
  818. end
  819. --[[[
  820. -- @function lua_util.deepcopy(table)
  821. -- params: {
  822. - - table
  823. -- }
  824. -- Performs deep copy of the table. Including metatables
  825. --]]
  826. local function deepcopy(orig)
  827. local orig_type = type(orig)
  828. local copy
  829. if orig_type == 'table' then
  830. copy = {}
  831. for orig_key, orig_value in next, orig, nil do
  832. copy[deepcopy(orig_key)] = deepcopy(orig_value)
  833. end
  834. setmetatable(copy, deepcopy(getmetatable(orig)))
  835. else -- number, string, boolean, etc
  836. copy = orig
  837. end
  838. return copy
  839. end
  840. exports.deepcopy = deepcopy
  841. --[[[
  842. -- @function lua_util.shallowcopy(tbl)
  843. -- Performs shallow (and fast) copy of a table or another Lua type
  844. --]]
  845. exports.shallowcopy = function(orig)
  846. local orig_type = type(orig)
  847. local copy
  848. if orig_type == 'table' then
  849. copy = {}
  850. for orig_key, orig_value in pairs(orig) do
  851. copy[orig_key] = orig_value
  852. end
  853. else
  854. copy = orig
  855. end
  856. return copy
  857. end
  858. -- Debugging support
  859. local unconditional_debug = false
  860. local debug_modules = {}
  861. local debug_aliases = {}
  862. local log_level = 384 -- debug + forced (1 << 7 | 1 << 8)
  863. exports.init_debug_logging = function(config)
  864. local logger = require "rspamd_logger"
  865. -- Fill debug modules from the config
  866. local logging = config:get_all_opt('logging')
  867. if logging then
  868. local log_level_str = logging.level
  869. if log_level_str then
  870. if log_level_str == 'debug' then
  871. unconditional_debug = true
  872. end
  873. end
  874. if not unconditional_debug then
  875. if logging.debug_modules then
  876. for _,m in ipairs(logging.debug_modules) do
  877. debug_modules[m] = true
  878. logger.infox(config, 'enable debug for Lua module %s', m)
  879. end
  880. end
  881. if #debug_aliases > 0 then
  882. for alias,mod in pairs(debug_aliases) do
  883. if debug_modules[mod] then
  884. debug_modules[alias] = true
  885. logger.infox(config, 'enable debug for Lua module %s (%s aliased)',
  886. alias, mod)
  887. end
  888. end
  889. end
  890. end
  891. end
  892. end
  893. exports.enable_debug_logging = function()
  894. unconditional_debug = true
  895. end
  896. exports.disable_debug_logging = function()
  897. unconditional_debug = false
  898. end
  899. --[[[
  900. -- @function lua_util.debugm(module, [log_object], format, ...)
  901. -- Performs fast debug log for a specific module
  902. --]]
  903. exports.debugm = function(mod, obj_or_fmt, fmt_or_something, ...)
  904. local logger = require "rspamd_logger"
  905. if unconditional_debug or debug_modules[mod] then
  906. if type(obj_or_fmt) == 'string' then
  907. logger.logx(log_level, mod, '', 2, obj_or_fmt, fmt_or_something, ...)
  908. else
  909. logger.logx(log_level, mod, obj_or_fmt, 2, fmt_or_something, ...)
  910. end
  911. end
  912. end
  913. --[[[
  914. -- @function lua_util.add_debug_alias(mod, alias)
  915. -- Add debugging alias so logging to `alias` will be treated as logging to `mod`
  916. --]]
  917. exports.add_debug_alias = function(mod, alias)
  918. local logger = require "rspamd_logger"
  919. debug_aliases[alias] = mod
  920. if debug_modules[mod] then
  921. debug_modules[alias] = true
  922. logger.infox(rspamd_config, 'enable debug for Lua module %s (%s aliased)',
  923. alias, mod)
  924. end
  925. end
  926. ---[[[
  927. -- @function lua_util.get_task_verdict(task)
  928. -- Returns verdict for a task + score if certain, must be called from idempotent filters only
  929. -- Returns string:
  930. -- * `spam`: if message have over reject threshold and has more than one positive rule
  931. -- * `junk`: if a message has between score between [add_header/rewrite subject] to reject thresholds and has more than two positive rules
  932. -- * `passthrough`: if a message has been passed through some short-circuit rule
  933. -- * `ham`: if a message has overall score below junk level **and** more than three negative rule, or negative total score
  934. -- * `uncertain`: all other cases
  935. --]]
  936. exports.get_task_verdict = function(task)
  937. local lua_verdict = require "lua_verdict"
  938. return lua_verdict.get_default_verdict(task)
  939. end
  940. ---[[[
  941. -- @function lua_util.maybe_obfuscate_string(subject, settings, prefix)
  942. -- Obfuscate string if enabled in settings. Also checks utf8 validity - if
  943. -- string is not valid utf8 then '???' is returned. Empty string returned as is.
  944. -- Supported settings:
  945. -- * <prefix>_privacy = false - subject privacy is off
  946. -- * <prefix>_privacy_alg = 'blake2' - default hash-algorithm to obfuscate subject
  947. -- * <prefix>_privacy_prefix = 'obf' - prefix to show it's obfuscated
  948. -- * <prefix>_privacy_length = 16 - cut the length of the hash; if 0 or fasle full hash is returned
  949. -- @return obfuscated or validated subject
  950. --]]
  951. exports.maybe_obfuscate_string = function(subject, settings, prefix)
  952. local hash = require 'rspamd_cryptobox_hash'
  953. if not subject or subject == '' then
  954. return subject
  955. elseif not rspamd_util.is_valid_utf8(subject) then
  956. subject = '???'
  957. elseif settings[prefix .. '_privacy'] then
  958. local hash_alg = settings[prefix .. '_privacy_alg'] or 'blake2'
  959. local subject_hash = hash.create_specific(hash_alg, subject)
  960. local strip_len = settings[prefix .. '_privacy_length']
  961. if strip_len and strip_len > 0 then
  962. subject = subject_hash:hex():sub(1, strip_len)
  963. else
  964. subject = subject_hash:hex()
  965. end
  966. local privacy_prefix = settings[prefix .. '_privacy_prefix']
  967. if privacy_prefix and #privacy_prefix > 0 then
  968. subject = privacy_prefix .. ':' .. subject
  969. end
  970. end
  971. return subject
  972. end
  973. ---[[[
  974. -- @function lua_util.callback_from_string(str)
  975. -- Converts a string like `return function(...) end` to lua function and return true and this function
  976. -- or returns false + error message
  977. -- @return status code and function object or an error message
  978. --]]]
  979. exports.callback_from_string = function(s)
  980. local loadstring = loadstring or load
  981. if not s or #s == 0 then
  982. return false,'invalid or empty string'
  983. end
  984. s = exports.rspamd_str_trim(s)
  985. local inp
  986. if s:match('^return%s*function') then
  987. -- 'return function', can be evaluated directly
  988. inp = s
  989. elseif s:match('^function%s*%(') then
  990. inp = 'return ' .. s
  991. else
  992. -- Just a plain sequence
  993. inp = 'return function(...)\n' .. s .. '; end'
  994. end
  995. local ret, res_or_err = pcall(loadstring(inp))
  996. if not ret or type(res_or_err) ~= 'function' then
  997. return false,res_or_err
  998. end
  999. return ret,res_or_err
  1000. end
  1001. ---[[[
  1002. -- @function lua_util.keys(t)
  1003. -- Returns all keys from a specific table
  1004. -- @param {table} t input table (or iterator triplet)
  1005. -- @return array of keys
  1006. --]]]
  1007. exports.keys = function(gen, param, state)
  1008. local keys = {}
  1009. local i = 1
  1010. if param then
  1011. for k,_ in fun.iter(gen, param, state) do
  1012. rawset(keys, i, k)
  1013. i = i + 1
  1014. end
  1015. else
  1016. for k,_ in pairs(gen) do
  1017. rawset(keys, i, k)
  1018. i = i + 1
  1019. end
  1020. end
  1021. return keys
  1022. end
  1023. ---[[[
  1024. -- @function lua_util.values(t)
  1025. -- Returns all values from a specific table
  1026. -- @param {table} t input table
  1027. -- @return array of values
  1028. --]]]
  1029. exports.values = function(gen, param, state)
  1030. local values = {}
  1031. local i = 1
  1032. if param then
  1033. for _,v in fun.iter(gen, param, state) do
  1034. rawset(values, i, v)
  1035. i = i + 1
  1036. end
  1037. else
  1038. for _,v in pairs(gen) do
  1039. rawset(values, i, v)
  1040. i = i + 1
  1041. end
  1042. end
  1043. return values
  1044. end
  1045. ---[[[
  1046. -- @function lua_util.distance_sorted(t1, t2)
  1047. -- Returns distance between two sorted tables t1 and t2
  1048. -- @param {table} t1 input table
  1049. -- @param {table} t2 input table
  1050. -- @return distance between `t1` and `t2`
  1051. --]]]
  1052. exports.distance_sorted = function(t1, t2)
  1053. local ncomp = #t1
  1054. local ndiff = 0
  1055. local i,j = 1,1
  1056. if ncomp < #t2 then
  1057. ncomp = #t2
  1058. end
  1059. for _=1,ncomp do
  1060. if j > #t2 then
  1061. ndiff = ndiff + ncomp - #t2
  1062. if i > j then
  1063. ndiff = ndiff - (i - j)
  1064. end
  1065. break
  1066. elseif i > #t1 then
  1067. ndiff = ndiff + ncomp - #t1
  1068. if j > i then
  1069. ndiff = ndiff - (j - i)
  1070. end
  1071. break
  1072. end
  1073. if t1[i] == t2[j] then
  1074. i = i + 1
  1075. j = j + 1
  1076. elseif t1[i] < t2[j] then
  1077. i = i + 1
  1078. ndiff = ndiff + 1
  1079. else
  1080. j = j + 1
  1081. ndiff = ndiff + 1
  1082. end
  1083. end
  1084. return ndiff
  1085. end
  1086. ---[[[
  1087. -- @function lua_util.table_digest(t)
  1088. -- Returns hash of all values if t[1] is string or all keys/values otherwise
  1089. -- @param {table} t input array or map
  1090. -- @return {string} base32 representation of blake2b hash of all strings
  1091. --]]]
  1092. local function table_digest(t)
  1093. local cr = require "rspamd_cryptobox_hash"
  1094. local h = cr.create()
  1095. if t[1] then
  1096. for _,e in ipairs(t) do
  1097. if type(e) == 'table' then
  1098. h:update(table_digest(e))
  1099. else
  1100. h:update(tostring(e))
  1101. end
  1102. end
  1103. else
  1104. for k,v in pairs(t) do
  1105. h:update(tostring(k))
  1106. if type(v) == 'string' then
  1107. h:update(v)
  1108. elseif type(v) == 'table' then
  1109. h:update(table_digest(v))
  1110. end
  1111. end
  1112. end
  1113. return h:base32()
  1114. end
  1115. exports.table_digest = table_digest
  1116. ---[[[
  1117. -- @function lua_util.toboolean(v)
  1118. -- Converts a string or a number to boolean
  1119. -- @param {string|number} v
  1120. -- @return {boolean} v converted to boolean
  1121. --]]]
  1122. exports.toboolean = function(v)
  1123. local true_t = {
  1124. ['1'] = true,
  1125. ['true'] = true,
  1126. ['TRUE'] = true,
  1127. ['True'] = true,
  1128. };
  1129. local false_t = {
  1130. ['0'] = false,
  1131. ['false'] = false,
  1132. ['FALSE'] = false,
  1133. ['False'] = false,
  1134. };
  1135. if type(v) == 'string' then
  1136. if true_t[v] == true then
  1137. return true;
  1138. elseif false_t[v] == false then
  1139. return false;
  1140. else
  1141. return false, string.format( 'cannot convert %q to boolean', v);
  1142. end
  1143. elseif type(v) == 'number' then
  1144. return (not (v == 0))
  1145. else
  1146. return false, string.format( 'cannot convert %q to boolean', v);
  1147. end
  1148. end
  1149. ---[[[
  1150. -- @function lua_util.config_check_local_or_authed(config, modname)
  1151. -- Reads check_local and check_authed from the config as this is used in many modules
  1152. -- @param {rspamd_config} config `rspamd_config` global
  1153. -- @param {name} module name
  1154. -- @return {boolean} v converted to boolean
  1155. --]]]
  1156. exports.config_check_local_or_authed = function(rspamd_config, modname, def_local, def_authed)
  1157. local check_local = def_local or false
  1158. local check_authed = def_authed or false
  1159. local function try_section(where)
  1160. local ret = false
  1161. local opts = rspamd_config:get_all_opt(where)
  1162. if type(opts) == 'table' then
  1163. if type(opts['check_local']) == 'boolean' then
  1164. check_local = opts['check_local']
  1165. ret = true
  1166. end
  1167. if type(opts['check_authed']) == 'boolean' then
  1168. check_authed = opts['check_authed']
  1169. ret = true
  1170. end
  1171. end
  1172. return ret
  1173. end
  1174. if not try_section(modname) then
  1175. try_section('options')
  1176. end
  1177. return {check_local, check_authed}
  1178. end
  1179. ---[[[
  1180. -- @function lua_util.is_skip_local_or_authed(task, conf[, ip])
  1181. -- Returns `true` if local or authenticated task should be skipped for this module
  1182. -- @param {rspamd_task} task
  1183. -- @param {table} conf table returned from `config_check_local_or_authed`
  1184. -- @param {rspamd_ip} ip optional ip address (can be obtained from a task)
  1185. -- @return {boolean} true if check should be skipped
  1186. --]]]
  1187. exports.is_skip_local_or_authed = function(task, conf, ip)
  1188. if not ip then
  1189. ip = task:get_from_ip()
  1190. end
  1191. if not conf then
  1192. conf = {false, false}
  1193. end
  1194. if ((not conf[2] and task:get_user()) or
  1195. (not conf[1] and type(ip) == 'userdata' and ip:is_local())) then
  1196. return true
  1197. end
  1198. return false
  1199. end
  1200. return exports