1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639 |
- --[[
- Copyright (c) 2023, Vsevolod Stakhov <vsevolod@rspamd.com>
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ]]--
-
- --[[[
- -- @module lua_util
- -- This module contains utility functions for working with Lua and/or Rspamd
- --]]
-
- local exports = {}
- local lpeg = require 'lpeg'
- local rspamd_util = require "rspamd_util"
- local fun = require "fun"
- local lupa = require "lupa"
-
- local split_grammar = {}
- local spaces_split_grammar
- local space = lpeg.S ' \t\n\v\f\r'
- local nospace = 1 - space
- local ptrim = space ^ 0 * lpeg.C((space ^ 0 * nospace ^ 1) ^ 0)
- local match = lpeg.match
-
- lupa.configure('{%', '%}', '{=', '=}', '{#', '#}', {
- keep_trailing_newline = true,
- autoescape = false,
- })
-
- lupa.filters.pbkdf = function(s)
- local cr = require "rspamd_cryptobox"
- return cr.pbkdf(s)
- end
-
- local function rspamd_str_split(s, sep)
- local gr
- if not sep then
- if not spaces_split_grammar then
- local _sep = space
- local elem = lpeg.C((1 - _sep) ^ 0)
- local p = lpeg.Ct(elem * (_sep * elem) ^ 0)
- spaces_split_grammar = p
- end
-
- gr = spaces_split_grammar
- else
- gr = split_grammar[sep]
-
- if not gr then
- local _sep
- if type(sep) == 'string' then
- _sep = lpeg.S(sep) -- Assume set
- else
- _sep = sep -- Assume lpeg object
- end
- local elem = lpeg.C((1 - _sep) ^ 0)
- local p = lpeg.Ct(elem * (_sep * elem) ^ 0)
- gr = p
- split_grammar[sep] = gr
- end
- end
-
- return gr:match(s)
- end
-
- --[[[
- -- @function lua_util.str_split(text, delimiter)
- -- Splits text into a numeric table by delimiter
- -- @param {string} text delimited text
- -- @param {string} delimiter the delimiter
- -- @return {table} numeric table containing string parts
- --]]
-
- exports.rspamd_str_split = rspamd_str_split
- exports.str_split = rspamd_str_split
-
- local function rspamd_str_trim(s)
- return match(ptrim, s)
- end
- exports.rspamd_str_trim = rspamd_str_trim
- --[[[
- -- @function lua_util.str_trim(text)
- -- Returns a string with no trailing and leading spaces
- -- @param {string} text input text
- -- @return {string} string with no trailing and leading spaces
- --]]
- exports.str_trim = rspamd_str_trim
-
- --[[[
- -- @function lua_util.str_startswith(text, prefix)
- -- @param {string} text
- -- @param {string} prefix
- -- @return {boolean} true if text starts with the specified prefix, false otherwise
- --]]
- exports.str_startswith = function(s, prefix)
- return s:sub(1, prefix:len()) == prefix
- end
-
- --[[[
- -- @function lua_util.str_endswith(text, suffix)
- -- @param {string} text
- -- @param {string} suffix
- -- @return {boolean} true if text ends with the specified suffix, false otherwise
- --]]
- exports.str_endswith = function(s, suffix)
- return s:find(suffix, -suffix:len(), true) ~= nil
- end
-
- --[[[
- -- @function lua_util.round(number, decimalPlaces)
- -- Round number to fixed number of decimal points
- -- @param {number} number number to round
- -- @param {number} decimalPlaces number of decimal points
- -- @return {number} rounded number
- --]]
-
- -- modified version from Robert Jay Gould http://lua-users.org/wiki/SimpleRound
- exports.round = function(num, numDecimalPlaces)
- local mult = 10 ^ (numDecimalPlaces or 0)
- if num >= 0 then
- return math.floor(num * mult + 0.5) / mult
- else
- return math.ceil(num * mult - 0.5) / mult
- end
- end
-
- --[[[
- -- @function lua_util.template(text, replacements)
- -- Replaces values in a text template
- -- Variable names can contain letters, numbers and underscores, are prefixed with `$` and may or not use curly braces.
- -- @param {string} text text containing variables
- -- @param {table} replacements key/value pairs for replacements
- -- @return {string} string containing replaced values
- -- @example
- -- local goop = lua_util.template("HELLO $FOO ${BAR}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
- -- -- goop contains "HELLO LUA WORLD!"
- --]]
-
- exports.template = function(tmpl, keys)
- local var_lit = lpeg.P { lpeg.R("az") + lpeg.R("AZ") + lpeg.R("09") + "_" }
- local var = lpeg.P { (lpeg.P("$") / "") * ((var_lit ^ 1) / keys) }
- local var_braced = lpeg.P { (lpeg.P("${") / "") * ((var_lit ^ 1) / keys) * (lpeg.P("}") / "") }
-
- local template_grammar = lpeg.Cs((var + var_braced + 1) ^ 0)
-
- return lpeg.match(template_grammar, tmpl)
- end
-
- local function enrich_template_with_globals(env)
- local newenv = exports.shallowcopy(env)
- newenv.paths = rspamd_paths
- newenv.env = rspamd_env
-
- return newenv
- end
- --[[[
- -- @function lua_util.jinja_template(text, env[, skip_global_env])
- -- Replaces values in a text template according to jinja2 syntax
- -- @param {string} text text containing variables
- -- @param {table} replacements key/value pairs for replacements
- -- @param {boolean} skip_global_env don't export Rspamd superglobals
- -- @return {string} string containing replaced values
- -- @example
- -- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
- -- "HELLO LUA WORLD!"
- --]]
- exports.jinja_template = function(text, env, skip_global_env)
- if not skip_global_env then
- env = enrich_template_with_globals(env)
- end
-
- return lupa.expand(text, env)
- end
-
- --[[[
- -- @function lua_util.jinja_file(filename, env[, skip_global_env])
- -- Replaces values in a text template according to jinja2 syntax
- -- @param {string} filename name of file to expand
- -- @param {table} replacements key/value pairs for replacements
- -- @param {boolean} skip_global_env don't export Rspamd superglobals
- -- @return {string} string containing replaced values
- -- @example
- -- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
- -- "HELLO LUA WORLD!"
- --]]
- exports.jinja_template_file = function(filename, env, skip_global_env)
- if not skip_global_env then
- env = enrich_template_with_globals(env)
- end
-
- return lupa.expand_file(filename, env)
- end
-
- exports.remove_email_aliases = function(email_addr)
- local function check_gmail_user(addr)
- -- Remove all points
- local no_dots_user = string.gsub(addr.user, '%.', '')
- local cap, pluses = string.match(no_dots_user, '^([^%+][^%+]*)(%+.*)$')
- if cap then
- return cap, rspamd_str_split(pluses, '+'), nil
- elseif no_dots_user ~= addr.user then
- return no_dots_user, {}, nil
- end
-
- return nil
- end
-
- local function check_address(addr)
- if addr.user then
- local cap, pluses = string.match(addr.user, '^([^%+][^%+]*)(%+.*)$')
- if cap then
- return cap, rspamd_str_split(pluses, '+'), nil
- end
- end
-
- return nil
- end
-
- local function set_addr(addr, new_user, new_domain)
- if new_user then
- addr.user = new_user
- end
- if new_domain then
- addr.domain = new_domain
- end
-
- if addr.domain then
- addr.addr = string.format('%s@%s', addr.user, addr.domain)
- else
- addr.addr = string.format('%s@', addr.user)
- end
-
- if addr.name and #addr.name > 0 then
- addr.raw = string.format('"%s" <%s>', addr.name, addr.addr)
- else
- addr.raw = string.format('<%s>', addr.addr)
- end
- end
-
- local function check_gmail(addr)
- local nu, tags, nd = check_gmail_user(addr)
-
- if nu then
- return nu, tags, nd
- end
-
- return nil
- end
-
- local function check_googlemail(addr)
- local nd = 'gmail.com'
- local nu, tags = check_gmail_user(addr)
-
- if nu then
- return nu, tags, nd
- end
-
- return nil, nil, nd
- end
-
- local specific_domains = {
- ['gmail.com'] = check_gmail,
- ['googlemail.com'] = check_googlemail,
- }
-
- if email_addr then
- if email_addr.domain and specific_domains[email_addr.domain] then
- local nu, tags, nd = specific_domains[email_addr.domain](email_addr)
- if nu or nd then
- set_addr(email_addr, nu, nd)
-
- return nu, tags
- end
- else
- local nu, tags, nd = check_address(email_addr)
- if nu or nd then
- set_addr(email_addr, nu, nd)
-
- return nu, tags
- end
- end
-
- return nil
- end
- end
-
- exports.is_rspamc_or_controller = function(task)
- local ua = task:get_request_header('User-Agent') or ''
- local pwd = task:get_request_header('Password')
- local is_rspamc = false
- if tostring(ua) == 'rspamc' or pwd then
- is_rspamc = true
- end
-
- return is_rspamc
- end
-
- --[[[
- -- @function lua_util.unpack(table)
- -- Converts numeric table to varargs
- -- This is `unpack` on Lua 5.1/5.2/LuaJIT and `table.unpack` on Lua 5.3
- -- @param {table} table numerically indexed table to unpack
- -- @return {varargs} unpacked table elements
- --]]
-
- local unpack_function = table.unpack or unpack
- exports.unpack = function(t)
- return unpack_function(t)
- end
-
- --[[[
- -- @function lua_util.flatten(table)
- -- Flatten underlying tables in a single table
- -- @param {table} table table of tables
- -- @return {table} flattened table
- --]]
- exports.flatten = function(t)
- local res = {}
- for _, e in fun.iter(t) do
- for _, v in fun.iter(e) do
- res[#res + 1] = v
- end
- end
-
- return res
- end
-
- --[[[
- -- @function lua_util.spairs(table)
- -- Like `pairs` but keys are sorted lexicographically
- -- @param {table} table table containing key/value pairs
- -- @return {function} generator function returning key/value pairs
- --]]
-
- -- Sorted iteration:
- -- for k,v in spairs(t) do ... end
- --
- -- or with custom comparison:
- -- for k, v in spairs(t, function(t, a, b) return t[a] < t[b] end)
- --
- -- optional limit is also available (e.g. return top X elements)
- local function spairs(t, order, lim)
- -- collect the keys
- local keys = {}
- for k in pairs(t) do
- keys[#keys + 1] = k
- end
-
- -- if order function given, sort by it by passing the table and keys a, b,
- -- otherwise just sort the keys
- if order then
- table.sort(keys, function(a, b)
- return order(t, a, b)
- end)
- else
- table.sort(keys)
- end
-
- -- return the iterator function
- local i = 0
- return function()
- i = i + 1
- if not lim or i <= lim then
- if keys[i] then
- return keys[i], t[keys[i]]
- end
- end
- end
- end
-
- exports.spairs = spairs
-
- local lua_cfg_utils = require "lua_cfg_utils"
-
- exports.config_utils = lua_cfg_utils
- exports.disable_module = lua_cfg_utils.disable_module
-
- --[[[
- -- @function lua_util.disable_module(modname)
- -- Checks experimental plugins state and disable if needed
- -- @param {string} modname name of plugin to check
- -- @return {boolean} true if plugin should be enabled, false otherwise
- --]]
- local function check_experimental(modname)
- if rspamd_config:experimental_enabled() then
- return true
- else
- lua_cfg_utils.disable_module(modname, 'experimental')
- end
-
- return false
- end
-
- exports.check_experimental = check_experimental
-
- --[[[
- -- @function lua_util.list_to_hash(list)
- -- Converts numerically-indexed table to table indexed by values
- -- @param {table} list numerically-indexed table or string, which is treated as a one-element list
- -- @return {table} table indexed by values
- -- @example
- -- local h = lua_util.list_to_hash({"a", "b"})
- -- -- h contains {a = true, b = true}
- --]]
- local function list_to_hash(list)
- if type(list) == 'table' then
- if list[1] then
- local h = {}
- for _, e in ipairs(list) do
- h[e] = true
- end
- return h
- else
- return list
- end
- elseif type(list) == 'string' then
- local h = {}
- h[list] = true
- return h
- end
- end
-
- exports.list_to_hash = list_to_hash
-
- --[[[
- -- @function lua_util.nkeys(table|gen, param, state)
- -- Returns number of keys in a table (i.e. from both the array and hash parts combined)
- -- @param {table} list numerically-indexed table or string, which is treated as a one-element list
- -- @return {number} number of keys
- -- @example
- -- print(lua_util.nkeys({})) -- 0
- -- print(lua_util.nkeys({ "a", nil, "b" })) -- 2
- -- print(lua_util.nkeys({ dog = 3, cat = 4, bird = nil })) -- 2
- -- print(lua_util.nkeys({ "a", dog = 3, cat = 4 })) -- 3
- --
- --]]
- local function nkeys(gen, param, state)
- local n = 0
- if not param then
- for _, _ in pairs(gen) do
- n = n + 1
- end
- else
- for _, _ in fun.iter(gen, param, state) do
- n = n + 1
- end
- end
- return n
- end
-
- exports.nkeys = nkeys
-
- --[[[
- -- @function lua_util.parse_time_interval(str)
- -- Parses human readable time interval
- -- Accepts 's' for seconds, 'm' for minutes, 'h' for hours, 'd' for days,
- -- 'w' for weeks, 'y' for years
- -- @param {string} str input string
- -- @return {number|nil} parsed interval as seconds (might be fractional)
- --]]
- local function parse_time_interval(str)
- local function parse_time_suffix(s)
- if s == 's' then
- return 1
- elseif s == 'm' then
- return 60
- elseif s == 'h' then
- return 3600
- elseif s == 'd' then
- return 86400
- elseif s == 'w' then
- return 86400 * 7
- elseif s == 'y' then
- return 365 * 86400;
- end
- end
-
- local digit = lpeg.R("09")
- local parser = {}
- parser.integer = (lpeg.S("+-") ^ -1) *
- (digit ^ 1)
- parser.fractional = (lpeg.P(".")) *
- (digit ^ 1)
- parser.number = (parser.integer *
- (parser.fractional ^ -1)) +
- (lpeg.S("+-") * parser.fractional)
- parser.time = lpeg.Cf(lpeg.Cc(1) *
- (parser.number / tonumber) *
- ((lpeg.S("smhdwy") / parse_time_suffix) ^ -1),
- function(acc, val)
- return acc * val
- end)
-
- local t = lpeg.match(parser.time, str)
-
- return t
- end
-
- exports.parse_time_interval = parse_time_interval
-
- --[[[
- -- @function lua_util.dehumanize_number(str)
- -- Parses human readable number
- -- Accepts 'k' for thousands, 'm' for millions, 'g' for billions, 'b' suffix for 1024 multiplier,
- -- e.g. `10mb` equal to `10 * 1024 * 1024`
- -- @param {string} str input string
- -- @return {number|nil} parsed number
- --]]
- local function dehumanize_number(str)
- local function parse_suffix(s)
- if s == 'k' then
- return 1000
- elseif s == 'm' then
- return 1000000
- elseif s == 'g' then
- return 1e9
- elseif s == 'kb' then
- return 1024
- elseif s == 'mb' then
- return 1024 * 1024
- elseif s == 'gb' then
- return 1024 * 1024;
- end
- end
-
- local digit = lpeg.R("09")
- local parser = {}
- parser.integer = (lpeg.S("+-") ^ -1) *
- (digit ^ 1)
- parser.fractional = (lpeg.P(".")) *
- (digit ^ 1)
- parser.number = (parser.integer *
- (parser.fractional ^ -1)) +
- (lpeg.S("+-") * parser.fractional)
- parser.humanized_number = lpeg.Cf(lpeg.Cc(1) *
- (parser.number / tonumber) *
- (((lpeg.S("kmg") * (lpeg.P("b") ^ -1)) / parse_suffix) ^ -1),
- function(acc, val)
- return acc * val
- end)
-
- local t = lpeg.match(parser.humanized_number, str)
-
- return t
- end
-
- exports.dehumanize_number = dehumanize_number
-
- --[[[
- -- @function lua_util.table_cmp(t1, t2)
- -- Compare two tables deeply
- --]]
- local function table_cmp(table1, table2)
- local avoid_loops = {}
- local function recurse(t1, t2)
- if type(t1) ~= type(t2) then
- return false
- end
- if type(t1) ~= "table" then
- return t1 == t2
- end
-
- if avoid_loops[t1] then
- return avoid_loops[t1] == t2
- end
- avoid_loops[t1] = t2
- -- Copy keys from t2
- local t2keys = {}
- local t2tablekeys = {}
- for k, _ in pairs(t2) do
- if type(k) == "table" then
- table.insert(t2tablekeys, k)
- end
- t2keys[k] = true
- end
- -- Let's iterate keys from t1
- for k1, v1 in pairs(t1) do
- local v2 = t2[k1]
- if type(k1) == "table" then
- -- if key is a table, we need to find an equivalent one.
- local ok = false
- for i, tk in ipairs(t2tablekeys) do
- if table_cmp(k1, tk) and recurse(v1, t2[tk]) then
- table.remove(t2tablekeys, i)
- t2keys[tk] = nil
- ok = true
- break
- end
- end
- if not ok then
- return false
- end
- else
- -- t1 has a key which t2 doesn't have, fail.
- if v2 == nil then
- return false
- end
- t2keys[k1] = nil
- if not recurse(v1, v2) then
- return false
- end
- end
- end
- -- if t2 has a key which t1 doesn't have, fail.
- if next(t2keys) then
- return false
- end
- return true
- end
- return recurse(table1, table2)
- end
-
- exports.table_cmp = table_cmp
-
- --[[[
- -- @function lua_util.table_merge(t1, t2)
- -- Merge two tables
- --]]
- local function table_merge(t1, t2)
- local res = {}
- local nidx = 1 -- for numeric indicies
- local it_func = function(k, v)
- if type(k) == 'number' then
- res[nidx] = v
- nidx = nidx + 1
- else
- res[k] = v
- end
- end
- for k, v in pairs(t1) do
- it_func(k, v)
- end
- for k, v in pairs(t2) do
- it_func(k, v)
- end
- return res
- end
-
- exports.table_merge = table_merge
-
- --[[[
- -- @function lua_util.table_cmp(task, name, value, stop_chars)
- -- Performs header folding
- --]]
- exports.fold_header = function(task, name, value, stop_chars)
-
- local how
-
- if task:has_flag("milter") then
- how = "lf"
- else
- how = task:get_newlines_type()
- end
-
- return rspamd_util.fold_header(name, value, how, stop_chars)
- end
-
- --[[[
- -- @function lua_util.override_defaults(defaults, override)
- -- Overrides values from defaults with override
- --]]
- local function override_defaults(def, override)
- -- Corner cases
- if not override or type(override) ~= 'table' then
- return def
- end
- if not def or type(def) ~= 'table' then
- return override
- end
-
- local res = {}
-
- for k, v in pairs(override) do
- if type(v) == 'table' then
- if def[k] and type(def[k]) == 'table' then
- -- Recursively override elements
- res[k] = override_defaults(def[k], v)
- else
- res[k] = v
- end
- else
- res[k] = v
- end
- end
-
- for k, v in pairs(def) do
- if type(res[k]) == 'nil' then
- res[k] = v
- end
- end
-
- return res
- end
-
- exports.override_defaults = override_defaults
-
- --[[[
- -- @function lua_util.filter_specific_urls(urls, params)
- -- params: {
- - - task - if needed to save in the cache
- - - limit <int> (default = 9999)
- - - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
- works only if number of unique eSLD less than `limit`
- - - need_emails <bool> (default = false)
- - - filter <callback> (default = nil)
- - - prefix <string> cache prefix (default = nil)
- -- }
- -- Apply heuristic in extracting of urls from `urls` table, this function
- -- tries its best to extract specific number of urls from a task based on
- -- their characteristics
- --]]
- exports.filter_specific_urls = function(urls, params)
- local cache_key
-
- if params.task and not params.no_cache then
- if params.prefix then
- cache_key = params.prefix
- else
- cache_key = string.format('sp_urls_%d%s%s%s', params.limit,
- tostring(params.need_emails or false),
- tostring(params.need_images or false),
- tostring(params.need_content or false))
- end
- local cached = params.task:cache_get(cache_key)
-
- if cached then
- return cached
- end
- end
-
- if not urls then
- return {}
- end
-
- if params.filter then
- urls = fun.totable(fun.filter(params.filter, urls))
- end
-
- -- Filter by tld:
- local tlds = {}
- local eslds = {}
- local ntlds, neslds = 0, 0
-
- local res = {}
- local nres = 0
-
- local function insert_url(str, u)
- if not res[str] then
- res[str] = u
- nres = nres + 1
-
- return true
- end
-
- return false
- end
-
- local function process_single_url(u, default_priority)
- local priority = default_priority or 1 -- Normal priority
- local flags = u:get_flags()
- if params.ignore_ip and flags.numeric then
- return
- end
-
- if flags.redirected then
- local redir = u:get_redirected() -- get the real url
-
- if params.ignore_redirected then
- -- Replace `u` with redir
- u = redir
- priority = 2
- else
- -- Process both redirected url and the original one
- process_single_url(redir, 2)
- end
- end
-
- if flags.image then
- if not params.need_images then
- -- Ignore url
- return
- else
- -- Penalise images in urls
- priority = 0
- end
- end
-
- local esld = u:get_tld()
- local str_hash = tostring(u)
-
- if esld then
- -- Special cases
- if (u:get_protocol() ~= 'mailto') and (not flags.html_displayed) then
- if flags.obscured then
- priority = 3
- else
- if (flags.has_user or flags.has_port) then
- priority = 2
- elseif (flags.subject or flags.phished) then
- priority = 2
- end
- end
- elseif flags.html_displayed then
- priority = 0
- end
-
- if not eslds[esld] then
- eslds[esld] = { { str_hash, u, priority } }
- neslds = neslds + 1
- else
- if #eslds[esld] < params.esld_limit then
- table.insert(eslds[esld], { str_hash, u, priority })
- end
- end
-
-
- -- eSLD - 1 part => tld
- local parts = rspamd_str_split(esld, '.')
- local tld = table.concat(fun.totable(fun.tail(parts)), '.')
-
- if not tlds[tld] then
- tlds[tld] = { { str_hash, u, priority } }
- ntlds = ntlds + 1
- else
- table.insert(tlds[tld], { str_hash, u, priority })
- end
- end
- end
-
- for _, u in ipairs(urls) do
- process_single_url(u)
- end
-
- local limit = params.limit
- limit = limit - nres
- if limit < 0 then
- limit = 0
- end
-
- if limit == 0 then
- res = exports.values(res)
- if params.task and not params.no_cache then
- params.task:cache_set(cache_key, res)
- end
- return res
- end
-
- -- Sort eSLDs and tlds
- local function sort_stuff(tbl)
- -- Sort according to max priority
- table.sort(tbl, function(e1, e2)
- -- Sort by priority so max priority is at the end
- table.sort(e1, function(tr1, tr2)
- return tr1[3] < tr2[3]
- end)
- table.sort(e2, function(tr1, tr2)
- return tr1[3] < tr2[3]
- end)
-
- if e1[#e1][3] ~= e2[#e2][3] then
- -- Sort by priority so max priority is at the beginning
- return e1[#e1][3] > e2[#e2][3]
- else
- -- Prefer less urls to more urls per esld
- return #e1 < #e2
- end
-
- end)
-
- return tbl
- end
-
- eslds = sort_stuff(exports.values(eslds))
- neslds = #eslds
-
- if neslds <= limit then
- -- Number of eslds < limit
- repeat
- local item_found = false
-
- for _, lurls in ipairs(eslds) do
- if #lurls > 0 then
- local last = table.remove(lurls)
- insert_url(last[1], last[2])
- limit = limit - 1
- item_found = true
- end
- end
-
- until limit <= 0 or not item_found
-
- res = exports.values(res)
- if params.task and not params.no_cache then
- params.task:cache_set(cache_key, res)
- end
- return res
- end
-
- tlds = sort_stuff(exports.values(tlds))
- ntlds = #tlds
-
- -- Number of tlds < limit
- while limit > 0 do
- for _, lurls in ipairs(tlds) do
- if #lurls > 0 then
- local last = table.remove(lurls)
- insert_url(last[1], last[2])
- limit = limit - 1
- end
- if limit == 0 then
- break
- end
- end
- end
-
- res = exports.values(res)
- if params.task and not params.no_cache then
- params.task:cache_set(cache_key, res)
- end
- return res
- end
-
- --[[[
- -- @function lua_util.extract_specific_urls(params)
- -- params: {
- - - task
- - - limit <int> (default = 9999)
- - - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
- works only if number of unique eSLD less than `limit`
- - - need_emails <bool> (default = false)
- - - filter <callback> (default = nil)
- - - prefix <string> cache prefix (default = nil)
- - - ignore_redirected <bool> (default = false)
- - - need_images <bool> (default = false)
- - - need_content <bool> (default = false)
- -- }
- -- Apply heuristic in extracting of urls from task, this function
- -- tries its best to extract specific number of urls from a task based on
- -- their characteristics
- --]]
- -- exports.extract_specific_urls = function(params_or_task, limit, need_emails, filter, prefix)
- exports.extract_specific_urls = function(params_or_task, lim, need_emails, filter, prefix)
- local default_params = {
- limit = 9999,
- esld_limit = 9999,
- need_emails = false,
- need_images = false,
- need_content = false,
- filter = nil,
- prefix = nil,
- ignore_ip = false,
- ignore_redirected = false,
- no_cache = false,
- }
-
- local params
- if type(params_or_task) == 'table' and type(lim) == 'nil' then
- params = params_or_task
- else
- -- Deprecated call
- params = {
- task = params_or_task,
- limit = lim,
- need_emails = need_emails,
- filter = filter,
- prefix = prefix
- }
- end
- for k, v in pairs(default_params) do
- if type(params[k]) == 'nil' and v ~= nil then
- params[k] = v
- end
- end
- local url_params = {
- emails = params.need_emails,
- images = params.need_images,
- content = params.need_content,
- flags = params.flags, -- maybe nil
- flags_mode = params.flags_mode, -- maybe nil
- }
-
- -- Shortcut for cached stuff
- if params.task and not params.no_cache then
- local cache_key
- if params.prefix then
- cache_key = params.prefix
- else
- local cache_key_suffix
- if params.flags then
- cache_key_suffix = table.concat(params.flags) .. (params.flags_mode or '')
- else
- cache_key_suffix = string.format('%s%s%s',
- tostring(params.need_emails or false),
- tostring(params.need_images or false),
- tostring(params.need_content or false))
- end
- cache_key = string.format('sp_urls_%d%s', params.limit, cache_key_suffix)
- end
- local cached = params.task:cache_get(cache_key)
-
- if cached then
- return cached
- end
- end
-
- -- No cache version
- local urls = params.task:get_urls(url_params)
-
- return exports.filter_specific_urls(urls, params)
- end
-
- --[[[
- -- @function lua_util.deepcopy(table)
- -- params: {
- - - table
- -- }
- -- Performs deep copy of the table. Including metatables
- --]]
- local function deepcopy(orig)
- local orig_type = type(orig)
- local copy
- if orig_type == 'table' then
- copy = {}
- for orig_key, orig_value in next, orig, nil do
- copy[deepcopy(orig_key)] = deepcopy(orig_value)
- end
- if getmetatable(orig) then
- setmetatable(copy, deepcopy(getmetatable(orig)))
- end
- else
- -- number, string, boolean, etc
- copy = orig
- end
- return copy
- end
-
- exports.deepcopy = deepcopy
-
- --[[[
- -- @function lua_util.deepsort(table)
- -- params: {
- - - table
- -- }
- -- Performs recursive in-place sort of a table
- --]]
- local function default_sort_cmp(e1, e2)
- if type(e1) == type(e2) then
- return e1 < e2
- else
- return type(e1) < type(e2)
- end
- end
-
- local function deepsort(tbl, sort_func)
- local orig_type = type(tbl)
- if orig_type == 'table' then
- table.sort(tbl, sort_func or default_sort_cmp)
- for _, orig_value in next, tbl, nil do
- deepsort(orig_value)
- end
- end
- end
-
- exports.deepsort = deepsort
-
- --[[[
- -- @function lua_util.shallowcopy(tbl)
- -- Performs shallow (and fast) copy of a table or another Lua type
- --]]
- exports.shallowcopy = function(orig)
- local orig_type = type(orig)
- local copy
- if orig_type == 'table' then
- copy = {}
- for orig_key, orig_value in pairs(orig) do
- copy[orig_key] = orig_value
- end
- else
- copy = orig
- end
- return copy
- end
-
- -- Debugging support
- local logger = require "rspamd_logger"
- local unconditional_debug = logger.log_level() == 'debug'
- local debug_modules = {}
- local debug_aliases = {}
- local log_level = 384 -- debug + forced (1 << 7 | 1 << 8)
-
-
- exports.init_debug_logging = function(config)
- -- Fill debug modules from the config
- if not unconditional_debug then
- local log_config = config:get_all_opt('logging')
- if log_config then
- local log_level_str = log_config.level
- if log_level_str then
- if log_level_str == 'debug' then
- unconditional_debug = true
- end
- end
- if log_config.debug_modules then
- for _, m in ipairs(log_config.debug_modules) do
- debug_modules[m] = true
- logger.infox(config, 'enable debug for Lua module %s', m)
- end
- end
-
- if #debug_aliases > 0 then
- for alias, mod in pairs(debug_aliases) do
- if debug_modules[mod] then
- debug_modules[alias] = true
- logger.infox(config, 'enable debug for Lua module %s (%s aliased)',
- alias, mod)
- end
- end
- end
- end
- end
- end
-
- exports.enable_debug_logging = function()
- unconditional_debug = true
- end
-
- exports.enable_debug_modules = function(...)
- for _, m in ipairs({ ... }) do
- debug_modules[m] = true
- end
- end
-
- exports.disable_debug_logging = function()
- unconditional_debug = false
- end
-
- --[[[
- -- @function lua_util.debugm(module, [log_object], format, ...)
- -- Performs fast debug log for a specific module
- --]]
- exports.debugm = function(mod, obj_or_fmt, fmt_or_something, ...)
- if unconditional_debug or debug_modules[mod] then
- if type(obj_or_fmt) == 'string' then
- logger.logx(log_level, mod, '', 2, obj_or_fmt, fmt_or_something, ...)
- else
- logger.logx(log_level, mod, obj_or_fmt, 2, fmt_or_something, ...)
- end
- end
- end
-
- --[[[
- -- @function lua_util.add_debug_alias(mod, alias)
- -- Add debugging alias so logging to `alias` will be treated as logging to `mod`
- --]]
- exports.add_debug_alias = function(mod, alias)
- debug_aliases[alias] = mod
-
- if debug_modules[mod] then
- debug_modules[alias] = true
- logger.infox(rspamd_config, 'enable debug for Lua module %s (%s aliased)',
- alias, mod)
- end
- end
- ---[[[
- -- @function lua_util.get_task_verdict(task)
- -- Returns verdict for a task + score if certain, must be called from idempotent filters only
- -- Returns string:
- -- * `spam`: if message have over reject threshold and has more than one positive rule
- -- * `junk`: if a message has between score between [add_header/rewrite subject] to reject thresholds and has more than two positive rules
- -- * `passthrough`: if a message has been passed through some short-circuit rule
- -- * `ham`: if a message has overall score below junk level **and** more than three negative rule, or negative total score
- -- * `uncertain`: all other cases
- --]]
- exports.get_task_verdict = function(task)
- local lua_verdict = require "lua_verdict"
-
- return lua_verdict.get_default_verdict(task)
- end
-
- ---[[[
- -- @function lua_util.maybe_obfuscate_string(subject, settings, prefix)
- -- Obfuscate string if enabled in settings. Also checks utf8 validity - if
- -- string is not valid utf8 then '???' is returned. Empty string returned as is.
- -- Supported settings:
- -- * <prefix>_privacy = false - subject privacy is off
- -- * <prefix>_privacy_alg = 'blake2' - default hash-algorithm to obfuscate subject
- -- * <prefix>_privacy_prefix = 'obf' - prefix to show it's obfuscated
- -- * <prefix>_privacy_length = 16 - cut the length of the hash; if 0 or fasle full hash is returned
- -- @return obfuscated or validated subject
- --]]
-
- exports.maybe_obfuscate_string = function(subject, settings, prefix)
- local hash = require 'rspamd_cryptobox_hash'
- if not subject or subject == '' then
- return subject
- elseif not rspamd_util.is_valid_utf8(subject) then
- subject = '???'
- elseif settings[prefix .. '_privacy'] then
- local hash_alg = settings[prefix .. '_privacy_alg'] or 'blake2'
- local subject_hash = hash.create_specific(hash_alg, subject)
-
- local strip_len = settings[prefix .. '_privacy_length']
- if strip_len and strip_len > 0 then
- subject = subject_hash:hex():sub(1, strip_len)
- else
- subject = subject_hash:hex()
- end
-
- local privacy_prefix = settings[prefix .. '_privacy_prefix']
- if privacy_prefix and #privacy_prefix > 0 then
- subject = privacy_prefix .. ':' .. subject
- end
- end
-
- return subject
- end
-
- ---[[[
- -- @function lua_util.callback_from_string(str)
- -- Converts a string like `return function(...) end` to lua function and return true and this function
- -- or returns false + error message
- -- @return status code and function object or an error message
- --]]]
- exports.callback_from_string = function(s)
- local loadstring = loadstring or load
-
- if not s or #s == 0 then
- return false, 'invalid or empty string'
- end
-
- s = exports.rspamd_str_trim(s)
- local inp
-
- if s:match('^return%s*function') then
- -- 'return function', can be evaluated directly
- inp = s
- elseif s:match('^function%s*%(') then
- inp = 'return ' .. s
- else
- -- Just a plain sequence
- inp = 'return function(...)\n' .. s .. '; end'
- end
-
- local ret, res_or_err = pcall(loadstring(inp))
-
- if not ret or type(res_or_err) ~= 'function' then
- return false, res_or_err
- end
-
- return ret, res_or_err
- end
-
- ---[[[
- -- @function lua_util.keys(t)
- -- Returns all keys from a specific table
- -- @param {table} t input table (or iterator triplet)
- -- @return array of keys
- --]]]
- exports.keys = function(gen, param, state)
- local keys = {}
- local i = 1
-
- if param then
- for k, _ in fun.iter(gen, param, state) do
- rawset(keys, i, k)
- i = i + 1
- end
- else
- for k, _ in pairs(gen) do
- rawset(keys, i, k)
- i = i + 1
- end
- end
-
- return keys
- end
-
- ---[[[
- -- @function lua_util.values(t)
- -- Returns all values from a specific table
- -- @param {table} t input table
- -- @return array of values
- --]]]
- exports.values = function(gen, param, state)
- local values = {}
- local i = 1
-
- if param then
- for _, v in fun.iter(gen, param, state) do
- rawset(values, i, v)
- i = i + 1
- end
- else
- for _, v in pairs(gen) do
- rawset(values, i, v)
- i = i + 1
- end
- end
-
- return values
- end
-
- ---[[[
- -- @function lua_util.distance_sorted(t1, t2)
- -- Returns distance between two sorted tables t1 and t2
- -- @param {table} t1 input table
- -- @param {table} t2 input table
- -- @return distance between `t1` and `t2`
- --]]]
- exports.distance_sorted = function(t1, t2)
- local ncomp = #t1
- local ndiff = 0
- local i, j = 1, 1
-
- if ncomp < #t2 then
- ncomp = #t2
- end
-
- for _ = 1, ncomp do
- if j > #t2 then
- ndiff = ndiff + ncomp - #t2
- if i > j then
- ndiff = ndiff - (i - j)
- end
- break
- elseif i > #t1 then
- ndiff = ndiff + ncomp - #t1
- if j > i then
- ndiff = ndiff - (j - i)
- end
- break
- end
-
- if t1[i] == t2[j] then
- i = i + 1
- j = j + 1
- elseif t1[i] < t2[j] then
- i = i + 1
- ndiff = ndiff + 1
- else
- j = j + 1
- ndiff = ndiff + 1
- end
- end
-
- return ndiff
- end
-
- ---[[[
- -- @function lua_util.table_digest(t)
- -- Returns hash of all values if t[1] is string or all keys/values otherwise
- -- @param {table} t input array or map
- -- @return {string} base32 representation of blake2b hash of all strings
- --]]]
- local function table_digest(t)
- local cr = require "rspamd_cryptobox_hash"
- local h = cr.create()
-
- if t[1] then
- for _, e in ipairs(t) do
- if type(e) == 'table' then
- h:update(table_digest(e))
- else
- h:update(tostring(e))
- end
- end
- else
- for k, v in pairs(t) do
- h:update(tostring(k))
-
- if type(v) == 'string' then
- h:update(v)
- elseif type(v) == 'table' then
- h:update(table_digest(v))
- end
- end
- end
- return h:base32()
- end
-
- exports.table_digest = table_digest
-
- ---[[[
- -- @function lua_util.toboolean(v)
- -- Converts a string or a number to boolean
- -- @param {string|number} v
- -- @return {boolean} v converted to boolean
- --]]]
- exports.toboolean = function(v)
- local true_t = {
- ['1'] = true,
- ['true'] = true,
- ['TRUE'] = true,
- ['True'] = true,
- };
- local false_t = {
- ['0'] = false,
- ['false'] = false,
- ['FALSE'] = false,
- ['False'] = false,
- };
-
- if type(v) == 'string' then
- if true_t[v] == true then
- return true;
- elseif false_t[v] == false then
- return false;
- else
- return false, string.format('cannot convert %q to boolean', v);
- end
- elseif type(v) == 'number' then
- return v ~= 0
- else
- return false, string.format('cannot convert %q to boolean', v);
- end
- end
-
- ---[[[
- -- @function lua_util.config_check_local_or_authed(config, modname)
- -- Reads check_local and check_authed from the config as this is used in many modules
- -- @param {rspamd_config} config `rspamd_config` global
- -- @param {name} module name
- -- @return {boolean} v converted to boolean
- --]]]
- exports.config_check_local_or_authed = function(rspamd_config, modname, def_local, def_authed)
- local check_local = def_local or false
- local check_authed = def_authed or false
-
- local function try_section(where)
- local ret = false
- local opts = rspamd_config:get_all_opt(where)
- if type(opts) == 'table' then
- if type(opts['check_local']) == 'boolean' then
- check_local = opts['check_local']
- ret = true
- end
- if type(opts['check_authed']) == 'boolean' then
- check_authed = opts['check_authed']
- ret = true
- end
- end
-
- return ret
- end
-
- if not try_section(modname) then
- try_section('options')
- end
-
- return { check_local, check_authed }
- end
-
- ---[[[
- -- @function lua_util.is_skip_local_or_authed(task, conf[, ip])
- -- Returns `true` if local or authenticated task should be skipped for this module
- -- @param {rspamd_task} task
- -- @param {table} conf table returned from `config_check_local_or_authed`
- -- @param {rspamd_ip} ip optional ip address (can be obtained from a task)
- -- @return {boolean} true if check should be skipped
- --]]]
- exports.is_skip_local_or_authed = function(task, conf, ip)
- if not ip then
- ip = task:get_from_ip()
- end
- if not conf then
- conf = { false, false }
- end
- if ((not conf[2] and task:get_user()) or
- (not conf[1] and type(ip) == 'userdata' and ip:is_local())) then
- return true
- end
-
- return false
- end
-
- ---[[[
- -- @function lua_util.maybe_smtp_quote_value(str)
- -- Checks string for the forbidden elements (tspecials in RFC and quote string if needed)
- -- @param {string} str input string
- -- @return {string} original or quoted string
- --]]]
- local tspecial = lpeg.S "()<>,;:\\\"/[]?= \t\v"
- local special_match = lpeg.P((1 - tspecial) ^ 0 * tspecial ^ 1)
- exports.maybe_smtp_quote_value = function(str)
- if special_match:match(str) then
- return string.format('"%s"', str:gsub('"', '\\"'))
- end
-
- return str
- end
-
- ---[[[
- -- @function lua_util.shuffle(table)
- -- Performs in-place shuffling of a table
- -- @param {table} tbl table to shuffle
- -- @return {table} same table
- --]]]
- exports.shuffle = function(tbl)
- local size = #tbl
- for i = size, 1, -1 do
- local rand = math.random(size)
- tbl[i], tbl[rand] = tbl[rand], tbl[i]
- end
- return tbl
- end
-
- --
- local hex_table = {}
- for idx = 0, 255 do
- hex_table[("%02X"):format(idx)] = string.char(idx)
- hex_table[("%02x"):format(idx)] = string.char(idx)
- end
-
- ---[[[
- -- @function lua_util.unhex(str)
- -- Decode hex encoded string
- -- @param {string} str string to decode
- -- @return {string} hex decoded string (valid hex pairs are decoded, everything else is printed as is)
- --]]]
- exports.unhex = function(str)
- return str:gsub('(..)', hex_table)
- end
-
- local http_upstream_lists = {}
- local function http_upstreams_by_url(pool, url)
- local rspamd_url = require "rspamd_url"
-
- local cached = http_upstream_lists[url]
- if cached then
- return cached
- end
-
- local real_url = rspamd_url.create(pool, url)
-
- if not real_url then
- return nil
- end
-
- local host = real_url:get_host()
- local proto = real_url:get_protocol() or 'http'
- local port = real_url:get_port() or (proto == 'https' and 443 or 80)
- local upstream_list = require "rspamd_upstream_list"
- local upstreams = upstream_list.create(host, port)
-
- if upstreams then
- http_upstream_lists[url] = upstreams
- return upstreams
- end
-
- return nil
- end
- ---[[[
- -- @function lua_util.http_upstreams_by_url(pool, url)
- -- Returns a cached or new upstreams list that corresponds to the specific url
- -- @param {mempool} pool memory pool to use (typically static pool from rspamd_config)
- -- @param {string} url full url
- -- @return {upstreams_list} object to get upstream from an url
- --]]]
- exports.http_upstreams_by_url = http_upstreams_by_url
-
- ---[[[
- -- @function lua_util.dns_timeout_augmentation(cfg)
- -- Returns an augmentation suitable to define DNS timeout for a module
- -- @return {string} a string in format 'timeout=x' where `x` is a number of seconds for DNS timeout
- --]]]
- local function dns_timeout_augmentation(cfg)
- return string.format('timeout=%f', cfg:get_dns_timeout() or 0.0)
- end
-
- exports.dns_timeout_augmentation = dns_timeout_augmentation
-
- ---[[[
- --- @function lua_util.strip_lua_comments(lua_code)
- -- Strips single-line and multi-line comments from a given Lua code string and removes
- -- any extra spaces or newlines.
- --
- -- @param lua_code The Lua code string to strip comments from.
- -- @return The resulting Lua code string with comments and extra spaces removed.
- --
- ---]]]
- local function strip_lua_comments(lua_code)
- -- Remove single-line comments
- lua_code = lua_code:gsub("%-%-[^\r\n]*", "")
-
- -- Remove multi-line comments
- lua_code = lua_code:gsub("%-%-%[%[.-%]%]", "")
-
- -- Remove extra spaces and newlines
- lua_code = lua_code:gsub("%s+", " ")
-
- return lua_code
- end
-
- exports.strip_lua_comments = strip_lua_comments
-
- ---[[[
- -- @function lua_util.join_path(...)
- -- Joins path components into a single path string using the appropriate separator
- -- for the current operating system.
- --
- -- @param ... Any number of path components to join together.
- -- @return A single path string, with components separated by the appropriate separator.
- --
- ---]]]
- local path_sep = package.config:sub(1, 1) or '/'
- local function join_path(...)
- local components = { ... }
-
- -- Join components using separator
- return table.concat(components, path_sep)
- end
- exports.join_path = join_path
-
- -- Short unit test for sanity
- if path_sep == '/' then
- assert(join_path('/path', 'to', 'file') == '/path/to/file')
- else
- assert(join_path('C:', 'path', 'to', 'file') == 'C:\\path\\to\\file')
- end
-
- -- Defines symbols priorities for common usage in prefilters/postfilters
- exports.symbols_priorities = {
- top = 10, -- Symbols must be executed first (or last), such as settings
- high = 9, -- Example: asn
- medium = 5, -- Everything should use this as default
- low = 0,
- }
-
- return exports
|