You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_util.lua 36KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422
  1. --[[
  2. Copyright (c) 2017, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module lua_util
  15. -- This module contains utility functions for working with Lua and/or Rspamd
  16. --]]
  17. local exports = {}
  18. local lpeg = require 'lpeg'
  19. local rspamd_util = require "rspamd_util"
  20. local fun = require "fun"
  21. local lupa = require "lupa"
  22. local split_grammar = {}
  23. local spaces_split_grammar
  24. local space = lpeg.S' \t\n\v\f\r'
  25. local nospace = 1 - space
  26. local ptrim = space^0 * lpeg.C((space^0 * nospace^1)^0)
  27. local match = lpeg.match
  28. lupa.configure('{%', '%}', '{=', '=}', '{#', '#}', {
  29. keep_trailing_newline = true,
  30. autoescape = false,
  31. })
  32. lupa.filters.pbkdf = function(s)
  33. local cr = require "rspamd_cryptobox"
  34. return cr.pbkdf(s)
  35. end
  36. local function rspamd_str_split(s, sep)
  37. local gr
  38. if not sep then
  39. if not spaces_split_grammar then
  40. local _sep = space
  41. local elem = lpeg.C((1 - _sep)^0)
  42. local p = lpeg.Ct(elem * (_sep * elem)^0)
  43. spaces_split_grammar = p
  44. end
  45. gr = spaces_split_grammar
  46. else
  47. gr = split_grammar[sep]
  48. if not gr then
  49. local _sep
  50. if type(sep) == 'string' then
  51. _sep = lpeg.S(sep) -- Assume set
  52. else
  53. _sep = sep -- Assume lpeg object
  54. end
  55. local elem = lpeg.C((1 - _sep)^0)
  56. local p = lpeg.Ct(elem * (_sep * elem)^0)
  57. gr = p
  58. split_grammar[sep] = gr
  59. end
  60. end
  61. return gr:match(s)
  62. end
  63. --[[[
  64. -- @function lua_util.str_split(text, deliminator)
  65. -- Splits text into a numeric table by deliminator
  66. -- @param {string} text deliminated text
  67. -- @param {string} deliminator the deliminator
  68. -- @return {table} numeric table containing string parts
  69. --]]
  70. exports.rspamd_str_split = rspamd_str_split
  71. exports.str_split = rspamd_str_split
  72. local function rspamd_str_trim(s)
  73. return match(ptrim, s)
  74. end
  75. exports.rspamd_str_trim = rspamd_str_trim
  76. --[[[
  77. -- @function lua_util.str_trim(text)
  78. -- Returns a string with no trailing and leading spaces
  79. -- @param {string} text input text
  80. -- @return {string} string with no trailing and leading spaces
  81. --]]
  82. exports.str_trim = rspamd_str_trim
  83. --[[[
  84. -- @function lua_util.str_startswith(text, prefix)
  85. -- @param {string} text
  86. -- @param {string} prefix
  87. -- @return {boolean} true if text starts with the specified prefix, false otherwise
  88. --]]
  89. exports.str_startswith = function(s, prefix)
  90. return s:sub(1, prefix:len()) == prefix
  91. end
  92. --[[[
  93. -- @function lua_util.str_endswith(text, suffix)
  94. -- @param {string} text
  95. -- @param {string} suffix
  96. -- @return {boolean} true if text ends with the specified suffix, false otherwise
  97. --]]
  98. exports.str_endswith = function(s, suffix)
  99. return s:sub(-suffix:len()) == suffix
  100. end
  101. --[[[
  102. -- @function lua_util.round(number, decimalPlaces)
  103. -- Round number to fixed number of decimal points
  104. -- @param {number} number number to round
  105. -- @param {number} decimalPlaces number of decimal points
  106. -- @return {number} rounded number
  107. --]]
  108. -- Robert Jay Gould http://lua-users.org/wiki/SimpleRound
  109. exports.round = function(num, numDecimalPlaces)
  110. local mult = 10^(numDecimalPlaces or 0)
  111. return math.floor(num * mult) / mult
  112. end
  113. --[[[
  114. -- @function lua_util.template(text, replacements)
  115. -- Replaces values in a text template
  116. -- Variable names can contain letters, numbers and underscores, are prefixed with `$` and may or not use curly braces.
  117. -- @param {string} text text containing variables
  118. -- @param {table} replacements key/value pairs for replacements
  119. -- @return {string} string containing replaced values
  120. -- @example
  121. -- local goop = lua_util.template("HELLO $FOO ${BAR}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
  122. -- -- goop contains "HELLO LUA WORLD!"
  123. --]]
  124. exports.template = function(tmpl, keys)
  125. local var_lit = lpeg.P { lpeg.R("az") + lpeg.R("AZ") + lpeg.R("09") + "_" }
  126. local var = lpeg.P { (lpeg.P("$") / "") * ((var_lit^1) / keys) }
  127. local var_braced = lpeg.P { (lpeg.P("${") / "") * ((var_lit^1) / keys) * (lpeg.P("}") / "") }
  128. local template_grammar = lpeg.Cs((var + var_braced + 1)^0)
  129. return lpeg.match(template_grammar, tmpl)
  130. end
  131. local function enrich_template_with_globals(env)
  132. local newenv = exports.shallowcopy(env)
  133. newenv.paths = rspamd_paths
  134. newenv.env = rspamd_env
  135. return newenv
  136. end
  137. --[[[
  138. -- @function lua_util.jinja_template(text, env[, skip_global_env])
  139. -- Replaces values in a text template according to jinja2 syntax
  140. -- @param {string} text text containing variables
  141. -- @param {table} replacements key/value pairs for replacements
  142. -- @param {boolean} skip_global_env don't export Rspamd superglobals
  143. -- @return {string} string containing replaced values
  144. -- @example
  145. -- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
  146. -- "HELLO LUA WORLD!"
  147. --]]
  148. exports.jinja_template = function(text, env, skip_global_env)
  149. if not skip_global_env then
  150. env = enrich_template_with_globals(env)
  151. end
  152. return lupa.expand(text, env)
  153. end
  154. --[[[
  155. -- @function lua_util.jinja_file(filename, env[, skip_global_env])
  156. -- Replaces values in a text template according to jinja2 syntax
  157. -- @param {string} filename name of file to expand
  158. -- @param {table} replacements key/value pairs for replacements
  159. -- @param {boolean} skip_global_env don't export Rspamd superglobals
  160. -- @return {string} string containing replaced values
  161. -- @example
  162. -- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
  163. -- "HELLO LUA WORLD!"
  164. --]]
  165. exports.jinja_template_file = function(filename, env, skip_global_env)
  166. if not skip_global_env then
  167. env = enrich_template_with_globals(env)
  168. end
  169. return lupa.expand_file(filename, env)
  170. end
  171. exports.remove_email_aliases = function(email_addr)
  172. local function check_gmail_user(addr)
  173. -- Remove all points
  174. local no_dots_user = string.gsub(addr.user, '%.', '')
  175. local cap, pluses = string.match(no_dots_user, '^([^%+][^%+]*)(%+.*)$')
  176. if cap then
  177. return cap, rspamd_str_split(pluses, '+'), nil
  178. elseif no_dots_user ~= addr.user then
  179. return no_dots_user,{},nil
  180. end
  181. return nil
  182. end
  183. local function check_address(addr)
  184. if addr.user then
  185. local cap, pluses = string.match(addr.user, '^([^%+][^%+]*)(%+.*)$')
  186. if cap then
  187. return cap, rspamd_str_split(pluses, '+'), nil
  188. end
  189. end
  190. return nil
  191. end
  192. local function set_addr(addr, new_user, new_domain)
  193. if new_user then
  194. addr.user = new_user
  195. end
  196. if new_domain then
  197. addr.domain = new_domain
  198. end
  199. if addr.domain then
  200. addr.addr = string.format('%s@%s', addr.user, addr.domain)
  201. else
  202. addr.addr = string.format('%s@', addr.user)
  203. end
  204. if addr.name and #addr.name > 0 then
  205. addr.raw = string.format('"%s" <%s>', addr.name, addr.addr)
  206. else
  207. addr.raw = string.format('<%s>', addr.addr)
  208. end
  209. end
  210. local function check_gmail(addr)
  211. local nu, tags, nd = check_gmail_user(addr)
  212. if nu then
  213. return nu, tags, nd
  214. end
  215. return nil
  216. end
  217. local function check_googlemail(addr)
  218. local nd = 'gmail.com'
  219. local nu, tags = check_gmail_user(addr)
  220. if nu then
  221. return nu, tags, nd
  222. end
  223. return nil, nil, nd
  224. end
  225. local specific_domains = {
  226. ['gmail.com'] = check_gmail,
  227. ['googlemail.com'] = check_googlemail,
  228. }
  229. if email_addr then
  230. if email_addr.domain and specific_domains[email_addr.domain] then
  231. local nu, tags, nd = specific_domains[email_addr.domain](email_addr)
  232. if nu or nd then
  233. set_addr(email_addr, nu, nd)
  234. return nu, tags
  235. end
  236. else
  237. local nu, tags, nd = check_address(email_addr)
  238. if nu or nd then
  239. set_addr(email_addr, nu, nd)
  240. return nu, tags
  241. end
  242. end
  243. return nil
  244. end
  245. end
  246. exports.is_rspamc_or_controller = function(task)
  247. local ua = task:get_request_header('User-Agent') or ''
  248. local pwd = task:get_request_header('Password')
  249. local is_rspamc = false
  250. if tostring(ua) == 'rspamc' or pwd then is_rspamc = true end
  251. return is_rspamc
  252. end
  253. --[[[
  254. -- @function lua_util.unpack(table)
  255. -- Converts numeric table to varargs
  256. -- This is `unpack` on Lua 5.1/5.2/LuaJIT and `table.unpack` on Lua 5.3
  257. -- @param {table} table numerically indexed table to unpack
  258. -- @return {varargs} unpacked table elements
  259. --]]
  260. local unpack_function = table.unpack or unpack
  261. exports.unpack = function(t)
  262. return unpack_function(t)
  263. end
  264. --[[[
  265. -- @function lua_util.flatten(table)
  266. -- Flatten underlying tables in a single table
  267. -- @param {table} table table of tables
  268. -- @return {table} flattened table
  269. --]]
  270. exports.flatten = function(t)
  271. local res = {}
  272. for _,e in fun.iter(t) do
  273. for _,v in fun.iter(e) do
  274. res[#res + 1] = v
  275. end
  276. end
  277. return res
  278. end
  279. --[[[
  280. -- @function lua_util.spairs(table)
  281. -- Like `pairs` but keys are sorted lexicographically
  282. -- @param {table} table table containing key/value pairs
  283. -- @return {function} generator function returning key/value pairs
  284. --]]
  285. -- Sorted iteration:
  286. -- for k,v in spairs(t) do ... end
  287. --
  288. -- or with custom comparison:
  289. -- for k, v in spairs(t, function(t, a, b) return t[a] < t[b] end)
  290. --
  291. -- optional limit is also available (e.g. return top X elements)
  292. local function spairs(t, order, lim)
  293. -- collect the keys
  294. local keys = {}
  295. for k in pairs(t) do keys[#keys+1] = k end
  296. -- if order function given, sort by it by passing the table and keys a, b,
  297. -- otherwise just sort the keys
  298. if order then
  299. table.sort(keys, function(a,b) return order(t, a, b) end)
  300. else
  301. table.sort(keys)
  302. end
  303. -- return the iterator function
  304. local i = 0
  305. return function()
  306. i = i + 1
  307. if not lim or i <= lim then
  308. if keys[i] then
  309. return keys[i], t[keys[i]]
  310. end
  311. end
  312. end
  313. end
  314. exports.spairs = spairs
  315. --[[[
  316. -- @function lua_util.disable_module(modname, how)
  317. -- Disables a plugin
  318. -- @param {string} modname name of plugin to disable
  319. -- @param {string} how 'redis' to disable redis, 'config' to disable startup
  320. --]]
  321. local function disable_module(modname, how)
  322. if rspamd_plugins_state.enabled[modname] then
  323. rspamd_plugins_state.enabled[modname] = nil
  324. end
  325. if how == 'redis' then
  326. rspamd_plugins_state.disabled_redis[modname] = {}
  327. elseif how == 'config' then
  328. rspamd_plugins_state.disabled_unconfigured[modname] = {}
  329. elseif how == 'experimental' then
  330. rspamd_plugins_state.disabled_experimental[modname] = {}
  331. else
  332. rspamd_plugins_state.disabled_failed[modname] = {}
  333. end
  334. end
  335. exports.disable_module = disable_module
  336. --[[[
  337. -- @function lua_util.disable_module(modname)
  338. -- Checks experimental plugins state and disable if needed
  339. -- @param {string} modname name of plugin to check
  340. -- @return {boolean} true if plugin should be enabled, false otherwise
  341. --]]
  342. local function check_experimental(modname)
  343. if rspamd_config:experimental_enabled() then
  344. return true
  345. else
  346. disable_module(modname, 'experimental')
  347. end
  348. return false
  349. end
  350. exports.check_experimental = check_experimental
  351. --[[[
  352. -- @function lua_util.list_to_hash(list)
  353. -- Converts numerically-indexed table to table indexed by values
  354. -- @param {table} list numerically-indexed table or string, which is treated as a one-element list
  355. -- @return {table} table indexed by values
  356. -- @example
  357. -- local h = lua_util.list_to_hash({"a", "b"})
  358. -- -- h contains {a = true, b = true}
  359. --]]
  360. local function list_to_hash(list)
  361. if type(list) == 'table' then
  362. if list[1] then
  363. local h = {}
  364. for _, e in ipairs(list) do
  365. h[e] = true
  366. end
  367. return h
  368. else
  369. return list
  370. end
  371. elseif type(list) == 'string' then
  372. local h = {}
  373. h[list] = true
  374. return h
  375. end
  376. end
  377. exports.list_to_hash = list_to_hash
  378. --[[[
  379. -- @function lua_util.nkeys(table|gen, param, state)
  380. -- Returns number of keys in a table (i.e. from both the array and hash parts combined)
  381. -- @param {table} list numerically-indexed table or string, which is treated as a one-element list
  382. -- @return {number} number of keys
  383. -- @example
  384. -- print(lua_util.nkeys({})) -- 0
  385. -- print(lua_util.nkeys({ "a", nil, "b" })) -- 2
  386. -- print(lua_util.nkeys({ dog = 3, cat = 4, bird = nil })) -- 2
  387. -- print(lua_util.nkeys({ "a", dog = 3, cat = 4 })) -- 3
  388. --
  389. --]]
  390. local function nkeys(gen, param, state)
  391. local n = 0
  392. if not param then
  393. for _,_ in pairs(gen) do n = n + 1 end
  394. else
  395. for _,_ in fun.iter(gen, param, state) do n = n + 1 end
  396. end
  397. return n
  398. end
  399. exports.nkeys = nkeys
  400. --[[[
  401. -- @function lua_util.parse_time_interval(str)
  402. -- Parses human readable time interval
  403. -- Accepts 's' for seconds, 'm' for minutes, 'h' for hours, 'd' for days,
  404. -- 'w' for weeks, 'y' for years
  405. -- @param {string} str input string
  406. -- @return {number|nil} parsed interval as seconds (might be fractional)
  407. --]]
  408. local function parse_time_interval(str)
  409. local function parse_time_suffix(s)
  410. if s == 's' then
  411. return 1
  412. elseif s == 'm' then
  413. return 60
  414. elseif s == 'h' then
  415. return 3600
  416. elseif s == 'd' then
  417. return 86400
  418. elseif s == 'w' then
  419. return 86400 * 7
  420. elseif s == 'y' then
  421. return 365 * 86400;
  422. end
  423. end
  424. local digit = lpeg.R("09")
  425. local parser = {}
  426. parser.integer =
  427. (lpeg.S("+-") ^ -1) *
  428. (digit ^ 1)
  429. parser.fractional =
  430. (lpeg.P(".") ) *
  431. (digit ^ 1)
  432. parser.number =
  433. (parser.integer *
  434. (parser.fractional ^ -1)) +
  435. (lpeg.S("+-") * parser.fractional)
  436. parser.time = lpeg.Cf(lpeg.Cc(1) *
  437. (parser.number / tonumber) *
  438. ((lpeg.S("smhdwy") / parse_time_suffix) ^ -1),
  439. function (acc, val) return acc * val end)
  440. local t = lpeg.match(parser.time, str)
  441. return t
  442. end
  443. exports.parse_time_interval = parse_time_interval
  444. --[[[
  445. -- @function lua_util.dehumanize_number(str)
  446. -- Parses human readable number
  447. -- Accepts 'k' for thousands, 'm' for millions, 'g' for billions, 'b' suffix for 1024 multiplier,
  448. -- e.g. `10mb` equal to `10 * 1024 * 1024`
  449. -- @param {string} str input string
  450. -- @return {number|nil} parsed number
  451. --]]
  452. local function dehumanize_number(str)
  453. local function parse_suffix(s)
  454. if s == 'k' then
  455. return 1000
  456. elseif s == 'm' then
  457. return 1000000
  458. elseif s == 'g' then
  459. return 1e9
  460. elseif s == 'kb' then
  461. return 1024
  462. elseif s == 'mb' then
  463. return 1024 * 1024
  464. elseif s == 'gb' then
  465. return 1024 * 1024;
  466. end
  467. end
  468. local digit = lpeg.R("09")
  469. local parser = {}
  470. parser.integer =
  471. (lpeg.S("+-") ^ -1) *
  472. (digit ^ 1)
  473. parser.fractional =
  474. (lpeg.P(".") ) *
  475. (digit ^ 1)
  476. parser.number =
  477. (parser.integer *
  478. (parser.fractional ^ -1)) +
  479. (lpeg.S("+-") * parser.fractional)
  480. parser.humanized_number = lpeg.Cf(lpeg.Cc(1) *
  481. (parser.number / tonumber) *
  482. (((lpeg.S("kmg") * (lpeg.P("b") ^ -1)) / parse_suffix) ^ -1),
  483. function (acc, val) return acc * val end)
  484. local t = lpeg.match(parser.humanized_number, str)
  485. return t
  486. end
  487. exports.dehumanize_number = dehumanize_number
  488. --[[[
  489. -- @function lua_util.table_cmp(t1, t2)
  490. -- Compare two tables deeply
  491. --]]
  492. local function table_cmp(table1, table2)
  493. local avoid_loops = {}
  494. local function recurse(t1, t2)
  495. if type(t1) ~= type(t2) then return false end
  496. if type(t1) ~= "table" then return t1 == t2 end
  497. if avoid_loops[t1] then return avoid_loops[t1] == t2 end
  498. avoid_loops[t1] = t2
  499. -- Copy keys from t2
  500. local t2keys = {}
  501. local t2tablekeys = {}
  502. for k, _ in pairs(t2) do
  503. if type(k) == "table" then table.insert(t2tablekeys, k) end
  504. t2keys[k] = true
  505. end
  506. -- Let's iterate keys from t1
  507. for k1, v1 in pairs(t1) do
  508. local v2 = t2[k1]
  509. if type(k1) == "table" then
  510. -- if key is a table, we need to find an equivalent one.
  511. local ok = false
  512. for i, tk in ipairs(t2tablekeys) do
  513. if table_cmp(k1, tk) and recurse(v1, t2[tk]) then
  514. table.remove(t2tablekeys, i)
  515. t2keys[tk] = nil
  516. ok = true
  517. break
  518. end
  519. end
  520. if not ok then return false end
  521. else
  522. -- t1 has a key which t2 doesn't have, fail.
  523. if v2 == nil then return false end
  524. t2keys[k1] = nil
  525. if not recurse(v1, v2) then return false end
  526. end
  527. end
  528. -- if t2 has a key which t1 doesn't have, fail.
  529. if next(t2keys) then return false end
  530. return true
  531. end
  532. return recurse(table1, table2)
  533. end
  534. exports.table_cmp = table_cmp
  535. --[[[
  536. -- @function lua_util.table_cmp(task, name, value, stop_chars)
  537. -- Performs header folding
  538. --]]
  539. exports.fold_header = function(task, name, value, stop_chars)
  540. local how
  541. if task:has_flag("milter") then
  542. how = "lf"
  543. else
  544. how = task:get_newlines_type()
  545. end
  546. return rspamd_util.fold_header(name, value, how, stop_chars)
  547. end
  548. --[[[
  549. -- @function lua_util.override_defaults(defaults, override)
  550. -- Overrides values from defaults with override
  551. --]]
  552. local function override_defaults(def, override)
  553. -- Corner cases
  554. if not override or type(override) ~= 'table' then
  555. return def
  556. end
  557. if not def or type(def) ~= 'table' then
  558. return override
  559. end
  560. local res = {}
  561. for k,v in pairs(override) do
  562. if type(v) == 'table' then
  563. if def[k] and type(def[k]) == 'table' then
  564. -- Recursively override elements
  565. res[k] = override_defaults(def[k], v)
  566. else
  567. res[k] = v
  568. end
  569. else
  570. res[k] = v
  571. end
  572. end
  573. for k,v in pairs(def) do
  574. if type(res[k]) == 'nil' then
  575. res[k] = v
  576. end
  577. end
  578. return res
  579. end
  580. exports.override_defaults = override_defaults
  581. --[[[
  582. -- @function lua_util.filter_specific_urls(urls, params)
  583. -- params: {
  584. - - task - if needed to save in the cache
  585. - - limit <int> (default = 9999)
  586. - - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
  587. works only if number of unique eSLD less than `limit`
  588. - - need_emails <bool> (default = false)
  589. - - filter <callback> (default = nil)
  590. - - prefix <string> cache prefix (default = nil)
  591. -- }
  592. -- Apply heuristic in extracting of urls from `urls` table, this function
  593. -- tries its best to extract specific number of urls from a task based on
  594. -- their characteristics
  595. --]]
  596. exports.filter_specific_urls = function (urls, params)
  597. local cache_key
  598. if params.task and not params.no_cache then
  599. if params.prefix then
  600. cache_key = params.prefix
  601. else
  602. cache_key = string.format('sp_urls_%d%s%s%s', params.limit,
  603. tostring(params.need_emails or false),
  604. tostring(params.need_images or false),
  605. tostring(params.need_content or false))
  606. end
  607. local cached = params.task:cache_get(cache_key)
  608. if cached then
  609. return cached
  610. end
  611. end
  612. if not urls then return {} end
  613. if params.filter then urls = fun.totable(fun.filter(params.filter, urls)) end
  614. -- Filter by tld:
  615. local tlds = {}
  616. local eslds = {}
  617. local ntlds, neslds = 0, 0
  618. local res = {}
  619. local nres = 0
  620. local function insert_url(str, u)
  621. if not res[str] then
  622. res[str] = u
  623. nres = nres + 1
  624. return true
  625. end
  626. return false
  627. end
  628. local function process_single_url(u, default_priority)
  629. local priority = default_priority or 1 -- Normal priority
  630. local flags = u:get_flags()
  631. if params.ignore_ip and flags.numeric then
  632. return
  633. end
  634. if flags.redirected then
  635. local redir = u:get_redirected() -- get the real url
  636. if params.ignore_redirected then
  637. -- Replace `u` with redir
  638. u = redir
  639. priority = 2
  640. else
  641. -- Process both redirected url and the original one
  642. process_single_url(redir, 2)
  643. end
  644. end
  645. if flags.image then
  646. if not params.need_images then
  647. -- Ignore url
  648. return
  649. else
  650. -- Penalise images in urls
  651. priority = 0
  652. end
  653. end
  654. local esld = u:get_tld()
  655. local str_hash = tostring(u)
  656. if esld then
  657. -- Special cases
  658. if (u:get_protocol() ~= 'mailto') and (not flags.html_displayed) then
  659. if flags.obscured then
  660. priority = 3
  661. else
  662. if (flags.has_user or flags.has_port) then
  663. priority = 2
  664. elseif (flags.subject or flags.phished) then
  665. priority = 2
  666. end
  667. end
  668. elseif flags.html_displayed then
  669. priority = 0
  670. end
  671. if not eslds[esld] then
  672. eslds[esld] = {{str_hash, u, priority}}
  673. neslds = neslds + 1
  674. else
  675. if #eslds[esld] < params.esld_limit then
  676. table.insert(eslds[esld], {str_hash, u, priority})
  677. end
  678. end
  679. -- eSLD - 1 part => tld
  680. local parts = rspamd_str_split(esld, '.')
  681. local tld = table.concat(fun.totable(fun.tail(parts)), '.')
  682. if not tlds[tld] then
  683. tlds[tld] = {{str_hash, u, priority}}
  684. ntlds = ntlds + 1
  685. else
  686. table.insert(tlds[tld], {str_hash, u, priority})
  687. end
  688. end
  689. end
  690. for _,u in ipairs(urls) do
  691. process_single_url(u)
  692. end
  693. local limit = params.limit
  694. limit = limit - nres
  695. if limit < 0 then limit = 0 end
  696. if limit == 0 then
  697. res = exports.values(res)
  698. if params.task and not params.no_cache then
  699. params.task:cache_set(cache_key, res)
  700. end
  701. return res
  702. end
  703. -- Sort eSLDs and tlds
  704. local function sort_stuff(tbl)
  705. -- Sort according to max priority
  706. table.sort(tbl, function(e1, e2)
  707. -- Sort by priority so max priority is at the end
  708. table.sort(e1, function(tr1, tr2)
  709. return tr1[3] < tr2[3]
  710. end)
  711. table.sort(e2, function(tr1, tr2)
  712. return tr1[3] < tr2[3]
  713. end)
  714. if e1[#e1][3] ~= e2[#e2][3] then
  715. -- Sort by priority so max priority is at the beginning
  716. return e1[#e1][3] > e2[#e2][3]
  717. else
  718. -- Prefer less urls to more urls per esld
  719. return #e1 < #e2
  720. end
  721. end)
  722. return tbl
  723. end
  724. eslds = sort_stuff(exports.values(eslds))
  725. neslds = #eslds
  726. if neslds <= limit then
  727. -- Number of eslds < limit
  728. repeat
  729. local item_found = false
  730. for _,lurls in ipairs(eslds) do
  731. if #lurls > 0 then
  732. local last = table.remove(lurls)
  733. insert_url(last[1], last[2])
  734. limit = limit - 1
  735. item_found = true
  736. end
  737. end
  738. until limit <= 0 or not item_found
  739. res = exports.values(res)
  740. if params.task and not params.no_cache then
  741. params.task:cache_set(cache_key, res)
  742. end
  743. return res
  744. end
  745. tlds = sort_stuff(exports.values(tlds))
  746. ntlds = #tlds
  747. -- Number of tlds < limit
  748. while limit > 0 do
  749. for _,lurls in ipairs(tlds) do
  750. if #lurls > 0 then
  751. local last = table.remove(lurls)
  752. insert_url(last[1], last[2])
  753. limit = limit - 1
  754. end
  755. if limit == 0 then break end
  756. end
  757. end
  758. res = exports.values(res)
  759. if params.task and not params.no_cache then
  760. params.task:cache_set(cache_key, res)
  761. end
  762. return res
  763. end
  764. --[[[
  765. -- @function lua_util.extract_specific_urls(params)
  766. -- params: {
  767. - - task
  768. - - limit <int> (default = 9999)
  769. - - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
  770. works only if number of unique eSLD less than `limit`
  771. - - need_emails <bool> (default = false)
  772. - - filter <callback> (default = nil)
  773. - - prefix <string> cache prefix (default = nil)
  774. - - ignore_redirected <bool> (default = false)
  775. - - need_images <bool> (default = false)
  776. - - need_content <bool> (default = false)
  777. -- }
  778. -- Apply heuristic in extracting of urls from task, this function
  779. -- tries its best to extract specific number of urls from a task based on
  780. -- their characteristics
  781. --]]
  782. -- exports.extract_specific_urls = function(params_or_task, limit, need_emails, filter, prefix)
  783. exports.extract_specific_urls = function(params_or_task, lim, need_emails, filter, prefix)
  784. local default_params = {
  785. limit = 9999,
  786. esld_limit = 9999,
  787. need_emails = false,
  788. need_images = false,
  789. need_content = false,
  790. filter = nil,
  791. prefix = nil,
  792. ignore_ip = false,
  793. ignore_redirected = false,
  794. no_cache = false,
  795. }
  796. local params
  797. if type(params_or_task) == 'table' and type(lim) == 'nil' then
  798. params = params_or_task
  799. else
  800. -- Deprecated call
  801. params = {
  802. task = params_or_task,
  803. limit = lim,
  804. need_emails = need_emails,
  805. filter = filter,
  806. prefix = prefix
  807. }
  808. end
  809. for k,v in pairs(default_params) do
  810. if type(params[k]) == 'nil' and v ~= nil then params[k] = v end
  811. end
  812. local url_params = {
  813. emails = params.need_emails,
  814. images = params.need_images,
  815. content = params.need_content,
  816. }
  817. -- Shortcut for cached stuff
  818. if params.task and not params.no_cache then
  819. local cache_key
  820. if params.prefix then
  821. cache_key = params.prefix
  822. else
  823. cache_key = string.format('sp_urls_%d%s%s%s', params.limit,
  824. tostring(params.need_emails or false),
  825. tostring(params.need_images or false),
  826. tostring(params.need_content or false))
  827. end
  828. local cached = params.task:cache_get(cache_key)
  829. if cached then
  830. return cached
  831. end
  832. end
  833. -- No cache version
  834. local urls = params.task:get_urls(url_params)
  835. return exports.filter_specific_urls(urls, params)
  836. end
  837. --[[[
  838. -- @function lua_util.deepcopy(table)
  839. -- params: {
  840. - - table
  841. -- }
  842. -- Performs deep copy of the table. Including metatables
  843. --]]
  844. local function deepcopy(orig)
  845. local orig_type = type(orig)
  846. local copy
  847. if orig_type == 'table' then
  848. copy = {}
  849. for orig_key, orig_value in next, orig, nil do
  850. copy[deepcopy(orig_key)] = deepcopy(orig_value)
  851. end
  852. if getmetatable(orig) then
  853. setmetatable(copy, deepcopy(getmetatable(orig)))
  854. end
  855. else -- number, string, boolean, etc
  856. copy = orig
  857. end
  858. return copy
  859. end
  860. exports.deepcopy = deepcopy
  861. --[[[
  862. -- @function lua_util.deepsort(table)
  863. -- params: {
  864. - - table
  865. -- }
  866. -- Performs recursive in-place sort of a table
  867. --]]
  868. local function deepsort(tbl, sort_func)
  869. local orig_type = type(tbl)
  870. if orig_type == 'table' then
  871. table.sort(tbl, sort_func)
  872. for _, orig_value in next, tbl, nil do
  873. deepsort(orig_value)
  874. end
  875. end
  876. end
  877. exports.deepsort = deepsort
  878. --[[[
  879. -- @function lua_util.shallowcopy(tbl)
  880. -- Performs shallow (and fast) copy of a table or another Lua type
  881. --]]
  882. exports.shallowcopy = function(orig)
  883. local orig_type = type(orig)
  884. local copy
  885. if orig_type == 'table' then
  886. copy = {}
  887. for orig_key, orig_value in pairs(orig) do
  888. copy[orig_key] = orig_value
  889. end
  890. else
  891. copy = orig
  892. end
  893. return copy
  894. end
  895. -- Debugging support
  896. local unconditional_debug = false
  897. local debug_modules = {}
  898. local debug_aliases = {}
  899. local log_level = 384 -- debug + forced (1 << 7 | 1 << 8)
  900. exports.init_debug_logging = function(config)
  901. local logger = require "rspamd_logger"
  902. -- Fill debug modules from the config
  903. local logging = config:get_all_opt('logging')
  904. if logging then
  905. local log_level_str = logging.level
  906. if log_level_str then
  907. if log_level_str == 'debug' then
  908. unconditional_debug = true
  909. end
  910. end
  911. if not unconditional_debug then
  912. if logging.debug_modules then
  913. for _,m in ipairs(logging.debug_modules) do
  914. debug_modules[m] = true
  915. logger.infox(config, 'enable debug for Lua module %s', m)
  916. end
  917. end
  918. if #debug_aliases > 0 then
  919. for alias,mod in pairs(debug_aliases) do
  920. if debug_modules[mod] then
  921. debug_modules[alias] = true
  922. logger.infox(config, 'enable debug for Lua module %s (%s aliased)',
  923. alias, mod)
  924. end
  925. end
  926. end
  927. end
  928. end
  929. end
  930. exports.enable_debug_logging = function()
  931. unconditional_debug = true
  932. end
  933. exports.disable_debug_logging = function()
  934. unconditional_debug = false
  935. end
  936. --[[[
  937. -- @function lua_util.debugm(module, [log_object], format, ...)
  938. -- Performs fast debug log for a specific module
  939. --]]
  940. exports.debugm = function(mod, obj_or_fmt, fmt_or_something, ...)
  941. local logger = require "rspamd_logger"
  942. if unconditional_debug or debug_modules[mod] then
  943. if type(obj_or_fmt) == 'string' then
  944. logger.logx(log_level, mod, '', 2, obj_or_fmt, fmt_or_something, ...)
  945. else
  946. logger.logx(log_level, mod, obj_or_fmt, 2, fmt_or_something, ...)
  947. end
  948. end
  949. end
  950. --[[[
  951. -- @function lua_util.add_debug_alias(mod, alias)
  952. -- Add debugging alias so logging to `alias` will be treated as logging to `mod`
  953. --]]
  954. exports.add_debug_alias = function(mod, alias)
  955. local logger = require "rspamd_logger"
  956. debug_aliases[alias] = mod
  957. if debug_modules[mod] then
  958. debug_modules[alias] = true
  959. logger.infox(rspamd_config, 'enable debug for Lua module %s (%s aliased)',
  960. alias, mod)
  961. end
  962. end
  963. ---[[[
  964. -- @function lua_util.get_task_verdict(task)
  965. -- Returns verdict for a task + score if certain, must be called from idempotent filters only
  966. -- Returns string:
  967. -- * `spam`: if message have over reject threshold and has more than one positive rule
  968. -- * `junk`: if a message has between score between [add_header/rewrite subject] to reject thresholds and has more than two positive rules
  969. -- * `passthrough`: if a message has been passed through some short-circuit rule
  970. -- * `ham`: if a message has overall score below junk level **and** more than three negative rule, or negative total score
  971. -- * `uncertain`: all other cases
  972. --]]
  973. exports.get_task_verdict = function(task)
  974. local lua_verdict = require "lua_verdict"
  975. return lua_verdict.get_default_verdict(task)
  976. end
  977. ---[[[
  978. -- @function lua_util.maybe_obfuscate_string(subject, settings, prefix)
  979. -- Obfuscate string if enabled in settings. Also checks utf8 validity - if
  980. -- string is not valid utf8 then '???' is returned. Empty string returned as is.
  981. -- Supported settings:
  982. -- * <prefix>_privacy = false - subject privacy is off
  983. -- * <prefix>_privacy_alg = 'blake2' - default hash-algorithm to obfuscate subject
  984. -- * <prefix>_privacy_prefix = 'obf' - prefix to show it's obfuscated
  985. -- * <prefix>_privacy_length = 16 - cut the length of the hash; if 0 or fasle full hash is returned
  986. -- @return obfuscated or validated subject
  987. --]]
  988. exports.maybe_obfuscate_string = function(subject, settings, prefix)
  989. local hash = require 'rspamd_cryptobox_hash'
  990. if not subject or subject == '' then
  991. return subject
  992. elseif not rspamd_util.is_valid_utf8(subject) then
  993. subject = '???'
  994. elseif settings[prefix .. '_privacy'] then
  995. local hash_alg = settings[prefix .. '_privacy_alg'] or 'blake2'
  996. local subject_hash = hash.create_specific(hash_alg, subject)
  997. local strip_len = settings[prefix .. '_privacy_length']
  998. if strip_len and strip_len > 0 then
  999. subject = subject_hash:hex():sub(1, strip_len)
  1000. else
  1001. subject = subject_hash:hex()
  1002. end
  1003. local privacy_prefix = settings[prefix .. '_privacy_prefix']
  1004. if privacy_prefix and #privacy_prefix > 0 then
  1005. subject = privacy_prefix .. ':' .. subject
  1006. end
  1007. end
  1008. return subject
  1009. end
  1010. ---[[[
  1011. -- @function lua_util.callback_from_string(str)
  1012. -- Converts a string like `return function(...) end` to lua function and return true and this function
  1013. -- or returns false + error message
  1014. -- @return status code and function object or an error message
  1015. --]]]
  1016. exports.callback_from_string = function(s)
  1017. local loadstring = loadstring or load
  1018. if not s or #s == 0 then
  1019. return false,'invalid or empty string'
  1020. end
  1021. s = exports.rspamd_str_trim(s)
  1022. local inp
  1023. if s:match('^return%s*function') then
  1024. -- 'return function', can be evaluated directly
  1025. inp = s
  1026. elseif s:match('^function%s*%(') then
  1027. inp = 'return ' .. s
  1028. else
  1029. -- Just a plain sequence
  1030. inp = 'return function(...)\n' .. s .. '; end'
  1031. end
  1032. local ret, res_or_err = pcall(loadstring(inp))
  1033. if not ret or type(res_or_err) ~= 'function' then
  1034. return false,res_or_err
  1035. end
  1036. return ret,res_or_err
  1037. end
  1038. ---[[[
  1039. -- @function lua_util.keys(t)
  1040. -- Returns all keys from a specific table
  1041. -- @param {table} t input table (or iterator triplet)
  1042. -- @return array of keys
  1043. --]]]
  1044. exports.keys = function(gen, param, state)
  1045. local keys = {}
  1046. local i = 1
  1047. if param then
  1048. for k,_ in fun.iter(gen, param, state) do
  1049. rawset(keys, i, k)
  1050. i = i + 1
  1051. end
  1052. else
  1053. for k,_ in pairs(gen) do
  1054. rawset(keys, i, k)
  1055. i = i + 1
  1056. end
  1057. end
  1058. return keys
  1059. end
  1060. ---[[[
  1061. -- @function lua_util.values(t)
  1062. -- Returns all values from a specific table
  1063. -- @param {table} t input table
  1064. -- @return array of values
  1065. --]]]
  1066. exports.values = function(gen, param, state)
  1067. local values = {}
  1068. local i = 1
  1069. if param then
  1070. for _,v in fun.iter(gen, param, state) do
  1071. rawset(values, i, v)
  1072. i = i + 1
  1073. end
  1074. else
  1075. for _,v in pairs(gen) do
  1076. rawset(values, i, v)
  1077. i = i + 1
  1078. end
  1079. end
  1080. return values
  1081. end
  1082. ---[[[
  1083. -- @function lua_util.distance_sorted(t1, t2)
  1084. -- Returns distance between two sorted tables t1 and t2
  1085. -- @param {table} t1 input table
  1086. -- @param {table} t2 input table
  1087. -- @return distance between `t1` and `t2`
  1088. --]]]
  1089. exports.distance_sorted = function(t1, t2)
  1090. local ncomp = #t1
  1091. local ndiff = 0
  1092. local i,j = 1,1
  1093. if ncomp < #t2 then
  1094. ncomp = #t2
  1095. end
  1096. for _=1,ncomp do
  1097. if j > #t2 then
  1098. ndiff = ndiff + ncomp - #t2
  1099. if i > j then
  1100. ndiff = ndiff - (i - j)
  1101. end
  1102. break
  1103. elseif i > #t1 then
  1104. ndiff = ndiff + ncomp - #t1
  1105. if j > i then
  1106. ndiff = ndiff - (j - i)
  1107. end
  1108. break
  1109. end
  1110. if t1[i] == t2[j] then
  1111. i = i + 1
  1112. j = j + 1
  1113. elseif t1[i] < t2[j] then
  1114. i = i + 1
  1115. ndiff = ndiff + 1
  1116. else
  1117. j = j + 1
  1118. ndiff = ndiff + 1
  1119. end
  1120. end
  1121. return ndiff
  1122. end
  1123. ---[[[
  1124. -- @function lua_util.table_digest(t)
  1125. -- Returns hash of all values if t[1] is string or all keys/values otherwise
  1126. -- @param {table} t input array or map
  1127. -- @return {string} base32 representation of blake2b hash of all strings
  1128. --]]]
  1129. local function table_digest(t)
  1130. local cr = require "rspamd_cryptobox_hash"
  1131. local h = cr.create()
  1132. if t[1] then
  1133. for _,e in ipairs(t) do
  1134. if type(e) == 'table' then
  1135. h:update(table_digest(e))
  1136. else
  1137. h:update(tostring(e))
  1138. end
  1139. end
  1140. else
  1141. for k,v in pairs(t) do
  1142. h:update(tostring(k))
  1143. if type(v) == 'string' then
  1144. h:update(v)
  1145. elseif type(v) == 'table' then
  1146. h:update(table_digest(v))
  1147. end
  1148. end
  1149. end
  1150. return h:base32()
  1151. end
  1152. exports.table_digest = table_digest
  1153. ---[[[
  1154. -- @function lua_util.toboolean(v)
  1155. -- Converts a string or a number to boolean
  1156. -- @param {string|number} v
  1157. -- @return {boolean} v converted to boolean
  1158. --]]]
  1159. exports.toboolean = function(v)
  1160. local true_t = {
  1161. ['1'] = true,
  1162. ['true'] = true,
  1163. ['TRUE'] = true,
  1164. ['True'] = true,
  1165. };
  1166. local false_t = {
  1167. ['0'] = false,
  1168. ['false'] = false,
  1169. ['FALSE'] = false,
  1170. ['False'] = false,
  1171. };
  1172. if type(v) == 'string' then
  1173. if true_t[v] == true then
  1174. return true;
  1175. elseif false_t[v] == false then
  1176. return false;
  1177. else
  1178. return false, string.format( 'cannot convert %q to boolean', v);
  1179. end
  1180. elseif type(v) == 'number' then
  1181. return (not (v == 0))
  1182. else
  1183. return false, string.format( 'cannot convert %q to boolean', v);
  1184. end
  1185. end
  1186. ---[[[
  1187. -- @function lua_util.config_check_local_or_authed(config, modname)
  1188. -- Reads check_local and check_authed from the config as this is used in many modules
  1189. -- @param {rspamd_config} config `rspamd_config` global
  1190. -- @param {name} module name
  1191. -- @return {boolean} v converted to boolean
  1192. --]]]
  1193. exports.config_check_local_or_authed = function(rspamd_config, modname, def_local, def_authed)
  1194. local check_local = def_local or false
  1195. local check_authed = def_authed or false
  1196. local function try_section(where)
  1197. local ret = false
  1198. local opts = rspamd_config:get_all_opt(where)
  1199. if type(opts) == 'table' then
  1200. if type(opts['check_local']) == 'boolean' then
  1201. check_local = opts['check_local']
  1202. ret = true
  1203. end
  1204. if type(opts['check_authed']) == 'boolean' then
  1205. check_authed = opts['check_authed']
  1206. ret = true
  1207. end
  1208. end
  1209. return ret
  1210. end
  1211. if not try_section(modname) then
  1212. try_section('options')
  1213. end
  1214. return {check_local, check_authed}
  1215. end
  1216. ---[[[
  1217. -- @function lua_util.is_skip_local_or_authed(task, conf[, ip])
  1218. -- Returns `true` if local or authenticated task should be skipped for this module
  1219. -- @param {rspamd_task} task
  1220. -- @param {table} conf table returned from `config_check_local_or_authed`
  1221. -- @param {rspamd_ip} ip optional ip address (can be obtained from a task)
  1222. -- @return {boolean} true if check should be skipped
  1223. --]]]
  1224. exports.is_skip_local_or_authed = function(task, conf, ip)
  1225. if not ip then
  1226. ip = task:get_from_ip()
  1227. end
  1228. if not conf then
  1229. conf = {false, false}
  1230. end
  1231. if ((not conf[2] and task:get_user()) or
  1232. (not conf[1] and type(ip) == 'userdata' and ip:is_local())) then
  1233. return true
  1234. end
  1235. return false
  1236. end
  1237. return exports