You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_util.lua 33KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298
  1. --[[
  2. Copyright (c) 2017, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module lua_util
  15. -- This module contains utility functions for working with Lua and/or Rspamd
  16. --]]
  17. local exports = {}
  18. local lpeg = require 'lpeg'
  19. local rspamd_util = require "rspamd_util"
  20. local fun = require "fun"
  21. local lupa = require "lupa"
  22. local split_grammar = {}
  23. local spaces_split_grammar
  24. local space = lpeg.S' \t\n\v\f\r'
  25. local nospace = 1 - space
  26. local ptrim = space^0 * lpeg.C((space^0 * nospace^1)^0)
  27. local match = lpeg.match
  28. lupa.configure('{%', '%}', '{=', '=}', '{#', '#}', {
  29. keep_trailing_newline = true,
  30. autoescape = false,
  31. })
  32. lupa.filters.pbkdf = function(s)
  33. local cr = require "rspamd_cryptobox"
  34. return cr.pbkdf(s)
  35. end
  36. local function rspamd_str_split(s, sep)
  37. local gr
  38. if not sep then
  39. if not spaces_split_grammar then
  40. local _sep = space
  41. local elem = lpeg.C((1 - _sep)^0)
  42. local p = lpeg.Ct(elem * (_sep * elem)^0)
  43. spaces_split_grammar = p
  44. end
  45. gr = spaces_split_grammar
  46. else
  47. gr = split_grammar[sep]
  48. if not gr then
  49. local _sep
  50. if type(sep) == 'string' then
  51. _sep = lpeg.S(sep) -- Assume set
  52. else
  53. _sep = sep -- Assume lpeg object
  54. end
  55. local elem = lpeg.C((1 - _sep)^0)
  56. local p = lpeg.Ct(elem * (_sep * elem)^0)
  57. gr = p
  58. split_grammar[sep] = gr
  59. end
  60. end
  61. return gr:match(s)
  62. end
  63. --[[[
  64. -- @function lua_util.str_split(text, deliminator)
  65. -- Splits text into a numeric table by deliminator
  66. -- @param {string} text deliminated text
  67. -- @param {string} deliminator the deliminator
  68. -- @return {table} numeric table containing string parts
  69. --]]
  70. exports.rspamd_str_split = rspamd_str_split
  71. exports.str_split = rspamd_str_split
  72. local function rspamd_str_trim(s)
  73. return match(ptrim, s)
  74. end
  75. exports.rspamd_str_trim = rspamd_str_trim
  76. --[[[
  77. -- @function lua_util.str_trim(text)
  78. -- Returns a string with no trailing and leading spaces
  79. -- @param {string} text input text
  80. -- @return {string} string with no trailing and leading spaces
  81. --]]
  82. exports.str_trim = rspamd_str_trim
  83. --[[[
  84. -- @function lua_util.round(number, decimalPlaces)
  85. -- Round number to fixed number of decimal points
  86. -- @param {number} number number to round
  87. -- @param {number} decimalPlaces number of decimal points
  88. -- @return {number} rounded number
  89. --]]
  90. -- Robert Jay Gould http://lua-users.org/wiki/SimpleRound
  91. exports.round = function(num, numDecimalPlaces)
  92. local mult = 10^(numDecimalPlaces or 0)
  93. return math.floor(num * mult) / mult
  94. end
  95. --[[[
  96. -- @function lua_util.template(text, replacements)
  97. -- Replaces values in a text template
  98. -- Variable names can contain letters, numbers and underscores, are prefixed with `$` and may or not use curly braces.
  99. -- @param {string} text text containing variables
  100. -- @param {table} replacements key/value pairs for replacements
  101. -- @return {string} string containing replaced values
  102. -- @example
  103. -- local goop = lua_util.template("HELLO $FOO ${BAR}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
  104. -- -- goop contains "HELLO LUA WORLD!"
  105. --]]
  106. exports.template = function(tmpl, keys)
  107. local var_lit = lpeg.P { lpeg.R("az") + lpeg.R("AZ") + lpeg.R("09") + "_" }
  108. local var = lpeg.P { (lpeg.P("$") / "") * ((var_lit^1) / keys) }
  109. local var_braced = lpeg.P { (lpeg.P("${") / "") * ((var_lit^1) / keys) * (lpeg.P("}") / "") }
  110. local template_grammar = lpeg.Cs((var + var_braced + 1)^0)
  111. return lpeg.match(template_grammar, tmpl)
  112. end
  113. local function enrich_template_with_globals(env)
  114. local newenv = exports.shallowcopy(env)
  115. newenv.paths = rspamd_paths
  116. newenv.env = rspamd_env
  117. return newenv
  118. end
  119. --[[[
  120. -- @function lua_util.jinja_template(text, env[, skip_global_env])
  121. -- Replaces values in a text template according to jinja2 syntax
  122. -- @param {string} text text containing variables
  123. -- @param {table} replacements key/value pairs for replacements
  124. -- @param {boolean} skip_global_env don't export Rspamd superglobals
  125. -- @return {string} string containing replaced values
  126. -- @example
  127. -- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
  128. -- "HELLO LUA WORLD!"
  129. --]]
  130. exports.jinja_template = function(text, env, skip_global_env)
  131. if not skip_global_env then
  132. env = enrich_template_with_globals(env)
  133. end
  134. return lupa.expand(text, env)
  135. end
  136. --[[[
  137. -- @function lua_util.jinja_file(filename, env[, skip_global_env])
  138. -- Replaces values in a text template according to jinja2 syntax
  139. -- @param {string} filename name of file to expand
  140. -- @param {table} replacements key/value pairs for replacements
  141. -- @param {boolean} skip_global_env don't export Rspamd superglobals
  142. -- @return {string} string containing replaced values
  143. -- @example
  144. -- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
  145. -- "HELLO LUA WORLD!"
  146. --]]
  147. exports.jinja_template_file = function(filename, env, skip_global_env)
  148. if not skip_global_env then
  149. env = enrich_template_with_globals(env)
  150. end
  151. return lupa.expand_file(filename, env)
  152. end
  153. exports.remove_email_aliases = function(email_addr)
  154. local function check_gmail_user(addr)
  155. -- Remove all points
  156. local no_dots_user = string.gsub(addr.user, '%.', '')
  157. local cap, pluses = string.match(no_dots_user, '^([^%+][^%+]*)(%+.*)$')
  158. if cap then
  159. return cap, rspamd_str_split(pluses, '+'), nil
  160. elseif no_dots_user ~= addr.user then
  161. return no_dots_user,{},nil
  162. end
  163. return nil
  164. end
  165. local function check_address(addr)
  166. if addr.user then
  167. local cap, pluses = string.match(addr.user, '^([^%+][^%+]*)(%+.*)$')
  168. if cap then
  169. return cap, rspamd_str_split(pluses, '+'), nil
  170. end
  171. end
  172. return nil
  173. end
  174. local function set_addr(addr, new_user, new_domain)
  175. if new_user then
  176. addr.user = new_user
  177. end
  178. if new_domain then
  179. addr.domain = new_domain
  180. end
  181. if addr.domain then
  182. addr.addr = string.format('%s@%s', addr.user, addr.domain)
  183. else
  184. addr.addr = string.format('%s@', addr.user)
  185. end
  186. if addr.name and #addr.name > 0 then
  187. addr.raw = string.format('"%s" <%s>', addr.name, addr.addr)
  188. else
  189. addr.raw = string.format('<%s>', addr.addr)
  190. end
  191. end
  192. local function check_gmail(addr)
  193. local nu, tags, nd = check_gmail_user(addr)
  194. if nu then
  195. return nu, tags, nd
  196. end
  197. return nil
  198. end
  199. local function check_googlemail(addr)
  200. local nd = 'gmail.com'
  201. local nu, tags = check_gmail_user(addr)
  202. if nu then
  203. return nu, tags, nd
  204. end
  205. return nil, nil, nd
  206. end
  207. local specific_domains = {
  208. ['gmail.com'] = check_gmail,
  209. ['googlemail.com'] = check_googlemail,
  210. }
  211. if email_addr then
  212. if email_addr.domain and specific_domains[email_addr.domain] then
  213. local nu, tags, nd = specific_domains[email_addr.domain](email_addr)
  214. if nu or nd then
  215. set_addr(email_addr, nu, nd)
  216. return nu, tags
  217. end
  218. else
  219. local nu, tags, nd = check_address(email_addr)
  220. if nu or nd then
  221. set_addr(email_addr, nu, nd)
  222. return nu, tags
  223. end
  224. end
  225. return nil
  226. end
  227. end
  228. exports.is_rspamc_or_controller = function(task)
  229. local ua = task:get_request_header('User-Agent') or ''
  230. local pwd = task:get_request_header('Password')
  231. local is_rspamc = false
  232. if tostring(ua) == 'rspamc' or pwd then is_rspamc = true end
  233. return is_rspamc
  234. end
  235. --[[[
  236. -- @function lua_util.unpack(table)
  237. -- Converts numeric table to varargs
  238. -- This is `unpack` on Lua 5.1/5.2/LuaJIT and `table.unpack` on Lua 5.3
  239. -- @param {table} table numerically indexed table to unpack
  240. -- @return {varargs} unpacked table elements
  241. --]]
  242. local unpack_function = table.unpack or unpack
  243. exports.unpack = function(t)
  244. return unpack_function(t)
  245. end
  246. --[[[
  247. -- @function lua_util.flatten(table)
  248. -- Flatten underlying tables in a single table
  249. -- @param {table} table table of tables
  250. -- @return {table} flattened table
  251. --]]
  252. exports.flatten = function(t)
  253. local res = {}
  254. for _,e in fun.iter(t) do
  255. for _,v in fun.iter(e) do
  256. res[#res + 1] = v
  257. end
  258. end
  259. return res
  260. end
  261. --[[[
  262. -- @function lua_util.spairs(table)
  263. -- Like `pairs` but keys are sorted lexicographically
  264. -- @param {table} table table containing key/value pairs
  265. -- @return {function} generator function returning key/value pairs
  266. --]]
  267. -- Sorted iteration:
  268. -- for k,v in spairs(t) do ... end
  269. --
  270. -- or with custom comparison:
  271. -- for k, v in spairs(t, function(t, a, b) return t[a] < t[b] end)
  272. --
  273. -- optional limit is also available (e.g. return top X elements)
  274. local function spairs(t, order, lim)
  275. -- collect the keys
  276. local keys = {}
  277. for k in pairs(t) do keys[#keys+1] = k end
  278. -- if order function given, sort by it by passing the table and keys a, b,
  279. -- otherwise just sort the keys
  280. if order then
  281. table.sort(keys, function(a,b) return order(t, a, b) end)
  282. else
  283. table.sort(keys)
  284. end
  285. -- return the iterator function
  286. local i = 0
  287. return function()
  288. i = i + 1
  289. if not lim or i <= lim then
  290. if keys[i] then
  291. return keys[i], t[keys[i]]
  292. end
  293. end
  294. end
  295. end
  296. exports.spairs = spairs
  297. --[[[
  298. -- @function lua_util.disable_module(modname, how)
  299. -- Disables a plugin
  300. -- @param {string} modname name of plugin to disable
  301. -- @param {string} how 'redis' to disable redis, 'config' to disable startup
  302. --]]
  303. local function disable_module(modname, how)
  304. if rspamd_plugins_state.enabled[modname] then
  305. rspamd_plugins_state.enabled[modname] = nil
  306. end
  307. if how == 'redis' then
  308. rspamd_plugins_state.disabled_redis[modname] = {}
  309. elseif how == 'config' then
  310. rspamd_plugins_state.disabled_unconfigured[modname] = {}
  311. elseif how == 'experimental' then
  312. rspamd_plugins_state.disabled_experimental[modname] = {}
  313. else
  314. rspamd_plugins_state.disabled_failed[modname] = {}
  315. end
  316. end
  317. exports.disable_module = disable_module
  318. --[[[
  319. -- @function lua_util.disable_module(modname)
  320. -- Checks experimental plugins state and disable if needed
  321. -- @param {string} modname name of plugin to check
  322. -- @return {boolean} true if plugin should be enabled, false otherwise
  323. --]]
  324. local function check_experimental(modname)
  325. if rspamd_config:experimental_enabled() then
  326. return true
  327. else
  328. disable_module(modname, 'experimental')
  329. end
  330. return false
  331. end
  332. exports.check_experimental = check_experimental
  333. --[[[
  334. -- @function lua_util.list_to_hash(list)
  335. -- Converts numerically-indexed table to table indexed by values
  336. -- @param {table} list numerically-indexed table or string, which is treated as a one-element list
  337. -- @return {table} table indexed by values
  338. -- @example
  339. -- local h = lua_util.list_to_hash({"a", "b"})
  340. -- -- h contains {a = true, b = true}
  341. --]]
  342. local function list_to_hash(list)
  343. if type(list) == 'table' then
  344. if list[1] then
  345. local h = {}
  346. for _, e in ipairs(list) do
  347. h[e] = true
  348. end
  349. return h
  350. else
  351. return list
  352. end
  353. elseif type(list) == 'string' then
  354. local h = {}
  355. h[list] = true
  356. return h
  357. end
  358. end
  359. exports.list_to_hash = list_to_hash
  360. --[[[
  361. -- @function lua_util.parse_time_interval(str)
  362. -- Parses human readable time interval
  363. -- Accepts 's' for seconds, 'm' for minutes, 'h' for hours, 'd' for days,
  364. -- 'w' for weeks, 'y' for years
  365. -- @param {string} str input string
  366. -- @return {number|nil} parsed interval as seconds (might be fractional)
  367. --]]
  368. local function parse_time_interval(str)
  369. local function parse_time_suffix(s)
  370. if s == 's' then
  371. return 1
  372. elseif s == 'm' then
  373. return 60
  374. elseif s == 'h' then
  375. return 3600
  376. elseif s == 'd' then
  377. return 86400
  378. elseif s == 'w' then
  379. return 86400 * 7
  380. elseif s == 'y' then
  381. return 365 * 86400;
  382. end
  383. end
  384. local digit = lpeg.R("09")
  385. local parser = {}
  386. parser.integer =
  387. (lpeg.S("+-") ^ -1) *
  388. (digit ^ 1)
  389. parser.fractional =
  390. (lpeg.P(".") ) *
  391. (digit ^ 1)
  392. parser.number =
  393. (parser.integer *
  394. (parser.fractional ^ -1)) +
  395. (lpeg.S("+-") * parser.fractional)
  396. parser.time = lpeg.Cf(lpeg.Cc(1) *
  397. (parser.number / tonumber) *
  398. ((lpeg.S("smhdwy") / parse_time_suffix) ^ -1),
  399. function (acc, val) return acc * val end)
  400. local t = lpeg.match(parser.time, str)
  401. return t
  402. end
  403. exports.parse_time_interval = parse_time_interval
  404. --[[[
  405. -- @function lua_util.dehumanize_number(str)
  406. -- Parses human readable number
  407. -- Accepts 'k' for thousands, 'm' for millions, 'g' for billions, 'b' suffix for 1024 multiplier,
  408. -- e.g. `10mb` equal to `10 * 1024 * 1024`
  409. -- @param {string} str input string
  410. -- @return {number|nil} parsed number
  411. --]]
  412. local function dehumanize_number(str)
  413. local function parse_suffix(s)
  414. if s == 'k' then
  415. return 1000
  416. elseif s == 'm' then
  417. return 1000000
  418. elseif s == 'g' then
  419. return 1e9
  420. elseif s == 'kb' then
  421. return 1024
  422. elseif s == 'mb' then
  423. return 1024 * 1024
  424. elseif s == 'gb' then
  425. return 1024 * 1024;
  426. end
  427. end
  428. local digit = lpeg.R("09")
  429. local parser = {}
  430. parser.integer =
  431. (lpeg.S("+-") ^ -1) *
  432. (digit ^ 1)
  433. parser.fractional =
  434. (lpeg.P(".") ) *
  435. (digit ^ 1)
  436. parser.number =
  437. (parser.integer *
  438. (parser.fractional ^ -1)) +
  439. (lpeg.S("+-") * parser.fractional)
  440. parser.humanized_number = lpeg.Cf(lpeg.Cc(1) *
  441. (parser.number / tonumber) *
  442. (((lpeg.S("kmg") * (lpeg.P("b") ^ -1)) / parse_suffix) ^ -1),
  443. function (acc, val) return acc * val end)
  444. local t = lpeg.match(parser.humanized_number, str)
  445. return t
  446. end
  447. exports.dehumanize_number = dehumanize_number
  448. --[[[
  449. -- @function lua_util.table_cmp(t1, t2)
  450. -- Compare two tables deeply
  451. --]]
  452. local function table_cmp(table1, table2)
  453. local avoid_loops = {}
  454. local function recurse(t1, t2)
  455. if type(t1) ~= type(t2) then return false end
  456. if type(t1) ~= "table" then return t1 == t2 end
  457. if avoid_loops[t1] then return avoid_loops[t1] == t2 end
  458. avoid_loops[t1] = t2
  459. -- Copy keys from t2
  460. local t2keys = {}
  461. local t2tablekeys = {}
  462. for k, _ in pairs(t2) do
  463. if type(k) == "table" then table.insert(t2tablekeys, k) end
  464. t2keys[k] = true
  465. end
  466. -- Let's iterate keys from t1
  467. for k1, v1 in pairs(t1) do
  468. local v2 = t2[k1]
  469. if type(k1) == "table" then
  470. -- if key is a table, we need to find an equivalent one.
  471. local ok = false
  472. for i, tk in ipairs(t2tablekeys) do
  473. if table_cmp(k1, tk) and recurse(v1, t2[tk]) then
  474. table.remove(t2tablekeys, i)
  475. t2keys[tk] = nil
  476. ok = true
  477. break
  478. end
  479. end
  480. if not ok then return false end
  481. else
  482. -- t1 has a key which t2 doesn't have, fail.
  483. if v2 == nil then return false end
  484. t2keys[k1] = nil
  485. if not recurse(v1, v2) then return false end
  486. end
  487. end
  488. -- if t2 has a key which t1 doesn't have, fail.
  489. if next(t2keys) then return false end
  490. return true
  491. end
  492. return recurse(table1, table2)
  493. end
  494. exports.table_cmp = table_cmp
  495. --[[[
  496. -- @function lua_util.table_cmp(task, name, value, stop_chars)
  497. -- Performs header folding
  498. --]]
  499. exports.fold_header = function(task, name, value, stop_chars)
  500. local how
  501. if task:has_flag("milter") then
  502. how = "lf"
  503. else
  504. how = task:get_newlines_type()
  505. end
  506. return rspamd_util.fold_header(name, value, how, stop_chars)
  507. end
  508. --[[[
  509. -- @function lua_util.override_defaults(defaults, override)
  510. -- Overrides values from defaults with override
  511. --]]
  512. local function override_defaults(def, override)
  513. -- Corner cases
  514. if not override or type(override) ~= 'table' then
  515. return def
  516. end
  517. if not def or type(def) ~= 'table' then
  518. return override
  519. end
  520. local res = {}
  521. for k,v in pairs(override) do
  522. if type(v) == 'table' then
  523. if def[k] and type(def[k]) == 'table' then
  524. -- Recursively override elements
  525. res[k] = override_defaults(def[k], v)
  526. else
  527. res[k] = v
  528. end
  529. else
  530. res[k] = v
  531. end
  532. end
  533. for k,v in pairs(def) do
  534. if type(res[k]) == 'nil' then
  535. res[k] = v
  536. end
  537. end
  538. return res
  539. end
  540. exports.override_defaults = override_defaults
  541. --[[[
  542. -- @function lua_util.filter_specific_urls(urls, params)
  543. -- params: {
  544. - - task - if needed to save in the cache
  545. - - limit <int> (default = 9999)
  546. - - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
  547. works only if number of unique eSLD less than `limit`
  548. - - need_emails <bool> (default = false)
  549. - - filter <callback> (default = nil)
  550. - - prefix <string> cache prefix (default = nil)
  551. -- }
  552. -- Apply heuristic in extracting of urls from `urls` table, this function
  553. -- tries its best to extract specific number of urls from a task based on
  554. -- their characteristics
  555. --]]
  556. exports.filter_specific_urls = function (urls, params)
  557. local cache_key
  558. if params.task and not params.no_cache then
  559. if params.prefix then
  560. cache_key = params.prefix
  561. else
  562. cache_key = string.format('sp_urls_%d%s%s', params.limit,
  563. tostring(params.need_emails or false),
  564. tostring(params.need_images or false))
  565. end
  566. local cached = params.task:cache_get(cache_key)
  567. if cached then
  568. return cached
  569. end
  570. end
  571. if not urls then return {} end
  572. if params.filter then urls = fun.totable(fun.filter(params.filter, urls)) end
  573. -- Filter by tld:
  574. local tlds = {}
  575. local eslds = {}
  576. local ntlds, neslds = 0, 0
  577. local res = {}
  578. local nres = 0
  579. local function insert_url(str, u)
  580. if not res[str] then
  581. res[str] = u
  582. nres = nres + 1
  583. return true
  584. end
  585. return false
  586. end
  587. local function process_single_url(u, default_priority)
  588. local priority = default_priority or 1 -- Normal priority
  589. local flags = u:get_flags()
  590. if params.ignore_ip and flags.numeric then
  591. return
  592. end
  593. if flags.redirected then
  594. local redir = u:get_redirected() -- get the real url
  595. if params.ignore_redirected then
  596. -- Replace `u` with redir
  597. u = redir
  598. priority = 2
  599. else
  600. -- Process both redirected url and the original one
  601. process_single_url(redir, 2)
  602. end
  603. end
  604. if flags.image then
  605. if not params.need_images then
  606. -- Ignore url
  607. return
  608. else
  609. -- Penalise images in urls
  610. priority = 0
  611. end
  612. end
  613. local esld = u:get_tld()
  614. local str_hash = tostring(u)
  615. if esld then
  616. -- Special cases
  617. if (u:get_protocol() ~= 'mailto') and (not flags.html_displayed) then
  618. if flags.obscured then
  619. priority = 3
  620. else
  621. if (flags.has_user or flags.has_port) then
  622. priority = 2
  623. elseif (flags.subject or flags.phished) then
  624. priority = 2
  625. end
  626. end
  627. elseif flags.html_displayed then
  628. priority = 0
  629. end
  630. if not eslds[esld] then
  631. eslds[esld] = {{str_hash, u, priority}}
  632. neslds = neslds + 1
  633. else
  634. if #eslds[esld] < params.esld_limit then
  635. table.insert(eslds[esld], {str_hash, u, priority})
  636. end
  637. end
  638. -- eSLD - 1 part => tld
  639. local parts = rspamd_str_split(esld, '.')
  640. local tld = table.concat(fun.totable(fun.tail(parts)), '.')
  641. if not tlds[tld] then
  642. tlds[tld] = {{str_hash, u, priority}}
  643. ntlds = ntlds + 1
  644. else
  645. table.insert(tlds[tld], {str_hash, u, priority})
  646. end
  647. end
  648. end
  649. for _,u in ipairs(urls) do
  650. process_single_url(u)
  651. end
  652. local limit = params.limit
  653. limit = limit - nres
  654. if limit < 0 then limit = 0 end
  655. if limit == 0 then
  656. res = exports.values(res)
  657. if params.task and not params.no_cache then
  658. params.task:cache_set(cache_key, res)
  659. end
  660. return res
  661. end
  662. -- Sort eSLDs and tlds
  663. local function sort_stuff(tbl)
  664. -- Sort according to max priority
  665. table.sort(tbl, function(e1, e2)
  666. -- Sort by priority so max priority is at the end
  667. table.sort(e1, function(tr1, tr2)
  668. return tr1[3] < tr2[3]
  669. end)
  670. table.sort(e2, function(tr1, tr2)
  671. return tr1[3] < tr2[3]
  672. end)
  673. if e1[#e1][3] ~= e2[#e2][3] then
  674. -- Sort by priority so max priority is at the beginning
  675. return e1[#e1][3] > e2[#e2][3]
  676. else
  677. -- Prefer less urls to more urls per esld
  678. return #e1 < #e2
  679. end
  680. end)
  681. return tbl
  682. end
  683. eslds = sort_stuff(exports.values(eslds))
  684. neslds = #eslds
  685. if neslds <= limit then
  686. -- Number of eslds < limit
  687. repeat
  688. local item_found = false
  689. for _,lurls in ipairs(eslds) do
  690. if #lurls > 0 then
  691. local last = table.remove(lurls)
  692. insert_url(last[1], last[2])
  693. limit = limit - 1
  694. item_found = true
  695. end
  696. end
  697. until limit <= 0 or not item_found
  698. res = exports.values(res)
  699. if params.task and not params.no_cache then
  700. params.task:cache_set(cache_key, res)
  701. end
  702. return res
  703. end
  704. tlds = sort_stuff(exports.values(tlds))
  705. ntlds = #tlds
  706. -- Number of tlds < limit
  707. while limit > 0 do
  708. for _,lurls in ipairs(tlds) do
  709. if #lurls > 0 then
  710. local last = table.remove(lurls)
  711. insert_url(last[1], last[2])
  712. limit = limit - 1
  713. end
  714. if limit == 0 then break end
  715. end
  716. end
  717. res = exports.values(res)
  718. if params.task and not params.no_cache then
  719. params.task:cache_set(cache_key, res)
  720. end
  721. return res
  722. end
  723. --[[[
  724. -- @function lua_util.extract_specific_urls(params)
  725. -- params: {
  726. - - task
  727. - - limit <int> (default = 9999)
  728. - - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
  729. works only if number of unique eSLD less than `limit`
  730. - - need_emails <bool> (default = false)
  731. - - filter <callback> (default = nil)
  732. - - prefix <string> cache prefix (default = nil)
  733. - - ignore_redirected <bool> (default = false)
  734. - - need_images <bool> (default = false)
  735. -- }
  736. -- Apply heuristic in extracting of urls from task, this function
  737. -- tries its best to extract specific number of urls from a task based on
  738. -- their characteristics
  739. --]]
  740. -- exports.extract_specific_urls = function(params_or_task, limit, need_emails, filter, prefix)
  741. exports.extract_specific_urls = function(params_or_task, lim, need_emails, filter, prefix)
  742. local default_params = {
  743. limit = 9999,
  744. esld_limit = 9999,
  745. need_emails = false,
  746. need_images = false,
  747. filter = nil,
  748. prefix = nil,
  749. ignore_ip = false,
  750. ignore_redirected = false,
  751. no_cache = false,
  752. }
  753. local params
  754. if type(params_or_task) == 'table' and type(lim) == 'nil' then
  755. params = params_or_task
  756. else
  757. -- Deprecated call
  758. params = {
  759. task = params_or_task,
  760. limit = lim,
  761. need_emails = need_emails,
  762. filter = filter,
  763. prefix = prefix
  764. }
  765. end
  766. for k,v in pairs(default_params) do
  767. if type(params[k]) == 'nil' and v ~= nil then params[k] = v end
  768. end
  769. local urls = params.task:get_urls(params.need_emails)
  770. return exports.filter_specific_urls(urls, params)
  771. end
  772. --[[[
  773. -- @function lua_util.deepcopy(table)
  774. -- params: {
  775. - - table
  776. -- }
  777. -- Performs deep copy of the table. Including metatables
  778. --]]
  779. local function deepcopy(orig)
  780. local orig_type = type(orig)
  781. local copy
  782. if orig_type == 'table' then
  783. copy = {}
  784. for orig_key, orig_value in next, orig, nil do
  785. copy[deepcopy(orig_key)] = deepcopy(orig_value)
  786. end
  787. setmetatable(copy, deepcopy(getmetatable(orig)))
  788. else -- number, string, boolean, etc
  789. copy = orig
  790. end
  791. return copy
  792. end
  793. exports.deepcopy = deepcopy
  794. --[[[
  795. -- @function lua_util.shallowcopy(tbl)
  796. -- Performs shallow (and fast) copy of a table or another Lua type
  797. --]]
  798. exports.shallowcopy = function(orig)
  799. local orig_type = type(orig)
  800. local copy
  801. if orig_type == 'table' then
  802. copy = {}
  803. for orig_key, orig_value in pairs(orig) do
  804. copy[orig_key] = orig_value
  805. end
  806. else
  807. copy = orig
  808. end
  809. return copy
  810. end
  811. -- Debugging support
  812. local unconditional_debug = false
  813. local debug_modules = {}
  814. local debug_aliases = {}
  815. local log_level = 384 -- debug + forced (1 << 7 | 1 << 8)
  816. exports.init_debug_logging = function(config)
  817. local logger = require "rspamd_logger"
  818. -- Fill debug modules from the config
  819. local logging = config:get_all_opt('logging')
  820. if logging then
  821. local log_level_str = logging.level
  822. if log_level_str then
  823. if log_level_str == 'debug' then
  824. unconditional_debug = true
  825. end
  826. end
  827. if not unconditional_debug then
  828. if logging.debug_modules then
  829. for _,m in ipairs(logging.debug_modules) do
  830. debug_modules[m] = true
  831. logger.infox(config, 'enable debug for Lua module %s', m)
  832. end
  833. end
  834. if #debug_aliases > 0 then
  835. for alias,mod in pairs(debug_aliases) do
  836. if debug_modules[mod] then
  837. debug_modules[alias] = true
  838. logger.infox(config, 'enable debug for Lua module %s (%s aliased)',
  839. alias, mod)
  840. end
  841. end
  842. end
  843. end
  844. end
  845. end
  846. exports.enable_debug_logging = function()
  847. unconditional_debug = true
  848. end
  849. exports.disable_debug_logging = function()
  850. unconditional_debug = false
  851. end
  852. --[[[
  853. -- @function lua_util.debugm(module, [log_object], format, ...)
  854. -- Performs fast debug log for a specific module
  855. --]]
  856. exports.debugm = function(mod, obj_or_fmt, fmt_or_something, ...)
  857. local logger = require "rspamd_logger"
  858. if unconditional_debug or debug_modules[mod] then
  859. if type(obj_or_fmt) == 'string' then
  860. logger.logx(log_level, mod, '', 2, obj_or_fmt, fmt_or_something, ...)
  861. else
  862. logger.logx(log_level, mod, obj_or_fmt, 2, fmt_or_something, ...)
  863. end
  864. end
  865. end
  866. --[[[
  867. -- @function lua_util.add_debug_alias(mod, alias)
  868. -- Add debugging alias so logging to `alias` will be treated as logging to `mod`
  869. --]]
  870. exports.add_debug_alias = function(mod, alias)
  871. local logger = require "rspamd_logger"
  872. debug_aliases[alias] = mod
  873. if debug_modules[mod] then
  874. debug_modules[alias] = true
  875. logger.infox(rspamd_config, 'enable debug for Lua module %s (%s aliased)',
  876. alias, mod)
  877. end
  878. end
  879. ---[[[
  880. -- @function lua_util.get_task_verdict(task)
  881. -- Returns verdict for a task + score if certain, must be called from idempotent filters only
  882. -- Returns string:
  883. -- * `spam`: if message have over reject threshold and has more than one positive rule
  884. -- * `junk`: if a message has between score between [add_header/rewrite subject] to reject thresholds and has more than two positive rules
  885. -- * `passthrough`: if a message has been passed through some short-circuit rule
  886. -- * `ham`: if a message has overall score below junk level **and** more than three negative rule, or negative total score
  887. -- * `uncertain`: all other cases
  888. --]]
  889. exports.get_task_verdict = function(task)
  890. local result = task:get_metric_result()
  891. if result then
  892. if result.passthrough then
  893. return 'passthrough',nil
  894. end
  895. local score = result.score
  896. local action = result.action
  897. if action == 'reject' and result.npositive > 1 then
  898. return 'spam',score
  899. elseif action == 'no action' then
  900. if score < 0 or result.nnegative > 3 then
  901. return 'ham',score
  902. end
  903. else
  904. -- All colors of junk
  905. if action == 'add header' or action == 'rewrite subject' then
  906. if result.npositive > 2 then
  907. return 'junk',score
  908. end
  909. end
  910. end
  911. return 'uncertain',score
  912. end
  913. end
  914. ---[[[
  915. -- @function lua_util.maybe_obfuscate_string(subject, settings, prefix)
  916. -- Obfuscate string if enabled in settings. Also checks utf8 validity - if
  917. -- string is not valid utf8 then '???' is returned. Empty string returned as is.
  918. -- Supported settings:
  919. -- * <prefix>_privacy = false - subject privacy is off
  920. -- * <prefix>_privacy_alg = 'blake2' - default hash-algorithm to obfuscate subject
  921. -- * <prefix>_privacy_prefix = 'obf' - prefix to show it's obfuscated
  922. -- * <prefix>_privacy_length = 16 - cut the length of the hash; if 0 or fasle full hash is returned
  923. -- @return obfuscated or validated subject
  924. --]]
  925. exports.maybe_obfuscate_string = function(subject, settings, prefix)
  926. local hash = require 'rspamd_cryptobox_hash'
  927. if not subject or subject == '' then
  928. return subject
  929. elseif not rspamd_util.is_valid_utf8(subject) then
  930. subject = '???'
  931. elseif settings[prefix .. '_privacy'] then
  932. local hash_alg = settings[prefix .. '_privacy_alg'] or 'blake2'
  933. local subject_hash = hash.create_specific(hash_alg, subject)
  934. local strip_len = settings[prefix .. '_privacy_length']
  935. if strip_len and strip_len > 0 then
  936. subject = subject_hash:hex():sub(1, strip_len)
  937. else
  938. subject = subject_hash:hex()
  939. end
  940. local privacy_prefix = settings[prefix .. '_privacy_prefix']
  941. if privacy_prefix and #privacy_prefix > 0 then
  942. subject = privacy_prefix .. ':' .. subject
  943. end
  944. end
  945. return subject
  946. end
  947. ---[[[
  948. -- @function lua_util.callback_from_string(str)
  949. -- Converts a string like `return function(...) end` to lua function and return true and this function
  950. -- or returns false + error message
  951. -- @return status code and function object or an error message
  952. --]]]
  953. exports.callback_from_string = function(s)
  954. local loadstring = loadstring or load
  955. if not s or #s == 0 then
  956. return false,'invalid or empty string'
  957. end
  958. s = exports.rspamd_str_trim(s)
  959. local inp
  960. if s:match('^return%s*function') then
  961. -- 'return function', can be evaluated directly
  962. inp = s
  963. elseif s:match('^function%s*%(') then
  964. inp = 'return ' .. s
  965. else
  966. -- Just a plain sequence
  967. inp = 'return function(...)\n' .. s .. '; end'
  968. end
  969. local ret, res_or_err = pcall(loadstring(inp))
  970. if not ret or type(res_or_err) ~= 'function' then
  971. return false,res_or_err
  972. end
  973. return ret,res_or_err
  974. end
  975. ---[[[
  976. -- @function lua_util.keys(t)
  977. -- Returns all keys from a specific table
  978. -- @param {table} t input table (or iterator triplet)
  979. -- @return array of keys
  980. --]]]
  981. exports.keys = function(gen, param, state)
  982. local keys = {}
  983. local i = 1
  984. if param then
  985. for k,_ in fun.iter(gen, param, state) do
  986. rawset(keys, i, k)
  987. i = i + 1
  988. end
  989. else
  990. for k,_ in pairs(gen) do
  991. rawset(keys, i, k)
  992. i = i + 1
  993. end
  994. end
  995. return keys
  996. end
  997. ---[[[
  998. -- @function lua_util.values(t)
  999. -- Returns all values from a specific table
  1000. -- @param {table} t input table
  1001. -- @return array of values
  1002. --]]]
  1003. exports.values = function(gen, param, state)
  1004. local values = {}
  1005. local i = 1
  1006. if param then
  1007. for _,v in fun.iter(gen, param, state) do
  1008. rawset(values, i, v)
  1009. i = i + 1
  1010. end
  1011. else
  1012. for _,v in pairs(gen) do
  1013. rawset(values, i, v)
  1014. i = i + 1
  1015. end
  1016. end
  1017. return values
  1018. end
  1019. ---[[[
  1020. -- @function lua_util.distance_sorted(t1, t2)
  1021. -- Returns distance between two sorted tables t1 and t2
  1022. -- @param {table} t1 input table
  1023. -- @param {table} t2 input table
  1024. -- @return distance between `t1` and `t2`
  1025. --]]]
  1026. exports.distance_sorted = function(t1, t2)
  1027. local ncomp = #t1
  1028. local ndiff = 0
  1029. local i,j = 1,1
  1030. if ncomp < #t2 then
  1031. ncomp = #t2
  1032. end
  1033. for _=1,ncomp do
  1034. if j > #t2 then
  1035. ndiff = ndiff + ncomp - #t2
  1036. if i > j then
  1037. ndiff = ndiff - (i - j)
  1038. end
  1039. break
  1040. elseif i > #t1 then
  1041. ndiff = ndiff + ncomp - #t1
  1042. if j > i then
  1043. ndiff = ndiff - (j - i)
  1044. end
  1045. break
  1046. end
  1047. if t1[i] == t2[j] then
  1048. i = i + 1
  1049. j = j + 1
  1050. elseif t1[i] < t2[j] then
  1051. i = i + 1
  1052. ndiff = ndiff + 1
  1053. else
  1054. j = j + 1
  1055. ndiff = ndiff + 1
  1056. end
  1057. end
  1058. return ndiff
  1059. end
  1060. ---[[[
  1061. -- @function lua_util.table_digest(t)
  1062. -- Returns hash of all values if t[1] is string or all keys/values otherwise
  1063. -- @param {table} t input array or map
  1064. -- @return {string} base32 representation of blake2b hash of all strings
  1065. --]]]
  1066. local function table_digest(t)
  1067. local cr = require "rspamd_cryptobox_hash"
  1068. local h = cr.create()
  1069. if t[1] then
  1070. for _,e in ipairs(t) do
  1071. if type(e) == 'table' then
  1072. h:update(table_digest(e))
  1073. else
  1074. h:update(tostring(e))
  1075. end
  1076. end
  1077. else
  1078. for k,v in pairs(t) do
  1079. h:update(tostring(k))
  1080. if type(v) == 'string' then
  1081. h:update(v)
  1082. elseif type(v) == 'table' then
  1083. h:update(table_digest(v))
  1084. end
  1085. end
  1086. end
  1087. return h:base32()
  1088. end
  1089. exports.table_digest = table_digest
  1090. ---[[[
  1091. -- @function lua_util.toboolean(v)
  1092. -- Converts a string or a number to boolean
  1093. -- @param {string|number} v
  1094. -- @return {boolean} v converted to boolean
  1095. --]]]
  1096. exports.toboolean = function(v)
  1097. local true_t = {
  1098. ['1'] = true,
  1099. ['true'] = true,
  1100. ['TRUE'] = true,
  1101. ['True'] = true,
  1102. };
  1103. local false_t = {
  1104. ['0'] = false,
  1105. ['false'] = false,
  1106. ['FALSE'] = false,
  1107. ['False'] = false,
  1108. };
  1109. if type(v) == 'string' then
  1110. if true_t[v] == true then
  1111. return true;
  1112. elseif false_t[v] == false then
  1113. return false;
  1114. else
  1115. return false, string.format( 'cannot convert %q to boolean', v);
  1116. end
  1117. elseif type(v) == 'number' then
  1118. return (not (v == 0))
  1119. else
  1120. return false, string.format( 'cannot convert %q to boolean', v);
  1121. end
  1122. end
  1123. return exports