aboutsummaryrefslogtreecommitdiffstats
path: root/lualib/lua_util.lua
blob: 8f24b28717ed0b83b5347a7c11f791849f594299 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
--[[
Copyright (c) 2017, Vsevolod Stakhov <vsevolod@highsecure.ru>

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--

--[[[
-- @module lua_util
-- This module contains utility functions for working with Lua and/or Rspamd
--]]

local exports = {}
local lpeg = require 'lpeg'
local rspamd_util = require "rspamd_util"
local fun = require "fun"
local lupa = require "lupa"

local split_grammar = {}
local spaces_split_grammar
local space = lpeg.S' \t\n\v\f\r'
local nospace = 1 - space
local ptrim = space^0 * lpeg.C((space^0 * nospace^1)^0)
local match = lpeg.match

lupa.configure('{%', '%}', '{=', '=}', '{#', '#}', {
  keep_trailing_newline = true,
  autoescape = false,
})

lupa.filters.pbkdf = function(s)
  local cr = require "rspamd_cryptobox"
  return cr.pbkdf(s)
end

local function rspamd_str_split(s, sep)
  local gr
  if not sep then
    if not spaces_split_grammar then
      local _sep = space
      local elem = lpeg.C((1 - _sep)^0)
      local p = lpeg.Ct(elem * (_sep * elem)^0)
      spaces_split_grammar = p
    end

    gr = spaces_split_grammar
  else
    gr = split_grammar[sep]

    if not gr then
      local _sep
      if type(sep) == 'string' then
        _sep = lpeg.S(sep) -- Assume set
      else
        _sep = sep -- Assume lpeg object
      end
      local elem = lpeg.C((1 - _sep)^0)
      local p = lpeg.Ct(elem * (_sep * elem)^0)
      gr = p
      split_grammar[sep] = gr
    end
  end

  return gr:match(s)
end

--[[[
-- @function lua_util.str_split(text, deliminator)
-- Splits text into a numeric table by deliminator
-- @param {string} text deliminated text
-- @param {string} deliminator the deliminator
-- @return {table} numeric table containing string parts
--]]

exports.rspamd_str_split = rspamd_str_split
exports.str_split = rspamd_str_split

exports.rspamd_str_trim = function(s)
  return match(ptrim, s)
end

--[[[
-- @function lua_util.round(number, decimalPlaces)
-- Round number to fixed number of decimal points
-- @param {number} number number to round
-- @param {number} decimalPlaces number of decimal points
-- @return {number} rounded number
--]]

-- Robert Jay Gould http://lua-users.org/wiki/SimpleRound
exports.round = function(num, numDecimalPlaces)
  local mult = 10^(numDecimalPlaces or 0)
  return math.floor(num * mult) / mult
end

--[[[
-- @function lua_util.template(text, replacements)
-- Replaces values in a text template
-- Variable names can contain letters, numbers and underscores, are prefixed with `$` and may or not use curly braces.
-- @param {string} text text containing variables
-- @param {table} replacements key/value pairs for replacements
-- @return {string} string containing replaced values
-- @example
-- local goop = lua_util.template("HELLO $FOO ${BAR}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
-- -- goop contains "HELLO LUA WORLD!"
--]]

exports.template = function(tmpl, keys)
  local var_lit = lpeg.P { lpeg.R("az") + lpeg.R("AZ") + lpeg.R("09") + "_" }
  local var = lpeg.P { (lpeg.P("$") / "") * ((var_lit^1) / keys) }
  local var_braced = lpeg.P { (lpeg.P("${") / "") * ((var_lit^1) / keys) * (lpeg.P("}") / "") }

  local template_grammar = lpeg.Cs((var + var_braced + 1)^0)

  return lpeg.match(template_grammar, tmpl)
end

local function enrich_template_with_globals(env)
  local newenv = exports.shallowcopy(env)
  newenv.paths = rspamd_paths
  newenv.env = rspamd_env

  return newenv
end
--[[[
-- @function lua_util.jinja_template(text, env[, skip_global_env])
-- Replaces values in a text template according to jinja2 syntax
-- @param {string} text text containing variables
-- @param {table} replacements key/value pairs for replacements
-- @param {boolean} skip_global_env don't export Rspamd superglobals
-- @return {string} string containing replaced values
-- @example
-- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
-- "HELLO LUA WORLD!"
--]]
exports.jinja_template = function(text, env, skip_global_env)
  if not skip_global_env then
    env = enrich_template_with_globals(env)
  end

  return lupa.expand(text, env)
end

--[[[
-- @function lua_util.jinja_file(filename, env[, skip_global_env])
-- Replaces values in a text template according to jinja2 syntax
-- @param {string} filename name of file to expand
-- @param {table} replacements key/value pairs for replacements
-- @param {boolean} skip_global_env don't export Rspamd superglobals
-- @return {string} string containing replaced values
-- @example
-- lua_util.jinja_template("HELLO {{FOO}} {{BAR}}!", {['FOO'] = 'LUA', ['BAR'] = 'WORLD'})
-- "HELLO LUA WORLD!"
--]]
exports.jinja_template_file = function(filename, env, skip_global_env)
  if not skip_global_env then
    env = enrich_template_with_globals(env)
  end

  return lupa.expand_file(filename, env)
end

exports.remove_email_aliases = function(email_addr)
  local function check_gmail_user(addr)
    -- Remove all points
    local no_dots_user = string.gsub(addr.user, '%.', '')
    local cap, pluses = string.match(no_dots_user, '^([^%+][^%+]*)(%+.*)$')
    if cap then
      return cap, rspamd_str_split(pluses, '+'), nil
    elseif no_dots_user ~= addr.user then
      return no_dots_user,{},nil
    end

    return nil
  end

  local function check_address(addr)
    if addr.user then
      local cap, pluses = string.match(addr.user, '^([^%+][^%+]*)(%+.*)$')
      if cap then
        return cap, rspamd_str_split(pluses, '+'), nil
      end
    end

    return nil
  end

  local function set_addr(addr, new_user, new_domain)
    if new_user then
      addr.user = new_user
    end
    if new_domain then
      addr.domain = new_domain
    end

    if addr.domain then
      addr.addr = string.format('%s@%s', addr.user, addr.domain)
    else
      addr.addr = string.format('%s@', addr.user)
    end

    if addr.name and #addr.name > 0 then
      addr.raw = string.format('"%s" <%s>', addr.name, addr.addr)
    else
      addr.raw = string.format('<%s>', addr.addr)
    end
  end

  local function check_gmail(addr)
    local nu, tags, nd = check_gmail_user(addr)

    if nu then
      return nu, tags, nd
    end

    return nil
  end

  local function check_googlemail(addr)
    local nd = 'gmail.com'
    local nu, tags = check_gmail_user(addr)

    if nu then
      return nu, tags, nd
    end

    return nil, nil, nd
  end

  local specific_domains = {
    ['gmail.com'] = check_gmail,
    ['googlemail.com'] = check_googlemail,
  }

  if email_addr then
    if email_addr.domain and specific_domains[email_addr.domain] then
      local nu, tags, nd = specific_domains[email_addr.domain](email_addr)
      if nu or nd then
        set_addr(email_addr, nu, nd)

        return nu, tags
      end
    else
      local nu, tags, nd = check_address(email_addr)
      if nu or nd then
        set_addr(email_addr, nu, nd)

        return nu, tags
      end
    end

    return nil
  end
end

exports.is_rspamc_or_controller = function(task)
  local ua = task:get_request_header('User-Agent') or ''
  local pwd = task:get_request_header('Password')
  local is_rspamc = false
  if tostring(ua) == 'rspamc' or pwd then is_rspamc = true end

  return is_rspamc
end

--[[[
-- @function lua_util.unpack(table)
-- Converts numeric table to varargs
-- This is `unpack` on Lua 5.1/5.2/LuaJIT and `table.unpack` on Lua 5.3
-- @param {table} table numerically indexed table to unpack
-- @return {varargs} unpacked table elements
--]]

local unpack_function = table.unpack or unpack
exports.unpack = function(t)
  return unpack_function(t)
end

--[[[
-- @function lua_util.flatten(table)
-- Flatten underlying tables in a single table
-- @param {table} table table of tables
-- @return {table} flattened table
--]]
exports.flatten = function(t)
  local res = {}
  for _,e in fun.iter(t) do
    for _,v in fun.iter(e) do
      res[#res + 1] = v
    end
  end

  return res
end

--[[[
-- @function lua_util.spairs(table)
-- Like `pairs` but keys are sorted lexicographically
-- @param {table} table table containing key/value pairs
-- @return {function} generator function returning key/value pairs
--]]

-- Sorted iteration:
-- for k,v in spairs(t) do ... end
--
-- or with custom comparison:
-- for k, v in spairs(t, function(t, a, b) return t[a] < t[b] end)
--
-- optional limit is also available (e.g. return top X elements)
local function spairs(t, order, lim)
  -- collect the keys
  local keys = {}
  for k in pairs(t) do keys[#keys+1] = k end

  -- if order function given, sort by it by passing the table and keys a, b,
  -- otherwise just sort the keys
  if order then
    table.sort(keys, function(a,b) return order(t, a, b) end)
  else
    table.sort(keys)
  end

  -- return the iterator function
  local i = 0
  return function()
    i = i + 1
    if not lim or i <= lim then
      if keys[i] then
        return keys[i], t[keys[i]]
      end
    end
  end
end

exports.spairs = spairs

--[[[
-- @function lua_util.disable_module(modname, how)
-- Disables a plugin
-- @param {string} modname name of plugin to disable
-- @param {string} how 'redis' to disable redis, 'config' to disable startup
--]]

local function disable_module(modname, how)
  if rspamd_plugins_state.enabled[modname] then
    rspamd_plugins_state.enabled[modname] = nil
  end

  if how == 'redis' then
    rspamd_plugins_state.disabled_redis[modname] = {}
  elseif how == 'config' then
    rspamd_plugins_state.disabled_unconfigured[modname] = {}
  elseif how == 'experimental' then
    rspamd_plugins_state.disabled_experimental[modname] = {}
  else
    rspamd_plugins_state.disabled_failed[modname] = {}
  end
end

exports.disable_module = disable_module

--[[[
-- @function lua_util.disable_module(modname)
-- Checks experimental plugins state and disable if needed
-- @param {string} modname name of plugin to check
-- @return {boolean} true if plugin should be enabled, false otherwise
--]]
local function check_experimental(modname)
  if rspamd_config:experimental_enabled() then
    return true
  else
    disable_module(modname, 'experimental')
  end

  return false
end

exports.check_experimental = check_experimental

--[[[
-- @function lua_util.list_to_hash(list)
-- Converts numerically-indexed table to table indexed by values
-- @param {table} list numerically-indexed table or string, which is treated as a one-element list
-- @return {table} table indexed by values
-- @example
-- local h = lua_util.list_to_hash({"a", "b"})
-- -- h contains {a = true, b = true}
--]]
local function list_to_hash(list)
  if type(list) == 'table' then
    if list[1] then
      local h = {}
      for _, e in ipairs(list) do
        h[e] = true
      end
      return h
    else
      return list
    end
  elseif type(list) == 'string' then
    local h = {}
    h[list] = true
    return h
  end
end

exports.list_to_hash = list_to_hash

--[[[
-- @function lua_util.parse_time_interval(str)
-- Parses human readable time interval
-- Accepts 's' for seconds, 'm' for minutes, 'h' for hours, 'd' for days,
-- 'w' for weeks, 'y' for years
-- @param {string} str input string
-- @return {number|nil} parsed interval as seconds (might be fractional)
--]]
local function parse_time_interval(str)
  local function parse_time_suffix(s)
    if s == 's' then
      return 1
    elseif s == 'm' then
      return 60
    elseif s == 'h' then
      return 3600
    elseif s == 'd' then
      return 86400
    elseif s == 'w' then
      return 86400 * 7
    elseif s == 'y' then
      return 365 * 86400;
    end
  end

  local digit = lpeg.R("09")
  local parser = {}
  parser.integer =
  (lpeg.S("+-") ^ -1) *
      (digit   ^  1)
  parser.fractional =
  (lpeg.P(".")   ) *
      (digit ^ 1)
  parser.number =
  (parser.integer *
      (parser.fractional ^ -1)) +
      (lpeg.S("+-") * parser.fractional)
  parser.time = lpeg.Cf(lpeg.Cc(1) *
      (parser.number / tonumber) *
      ((lpeg.S("smhdwy") / parse_time_suffix) ^ -1),
    function (acc, val) return acc * val end)

  local t = lpeg.match(parser.time, str)

  return t
end

exports.parse_time_interval = parse_time_interval

--[[[
-- @function lua_util.dehumanize_number(str)
-- Parses human readable number
-- Accepts 'k' for thousands, 'm' for millions, 'g' for billions, 'b' suffix for 1024 multiplier,
-- e.g. `10mb` equal to `10 * 1024 * 1024`
-- @param {string} str input string
-- @return {number|nil} parsed number
--]]
local function dehumanize_number(str)
  local function parse_suffix(s)
    if s == 'k' then
      return 1000
    elseif s == 'm' then
      return 1000000
    elseif s == 'g' then
      return 1e9
    elseif s == 'kb' then
      return 1024
    elseif s == 'mb' then
      return 1024 * 1024
    elseif s == 'gb' then
      return 1024 * 1024;
    end
  end

  local digit = lpeg.R("09")
  local parser = {}
  parser.integer =
  (lpeg.S("+-") ^ -1) *
      (digit   ^  1)
  parser.fractional =
  (lpeg.P(".")   ) *
      (digit ^ 1)
  parser.number =
  (parser.integer *
      (parser.fractional ^ -1)) +
      (lpeg.S("+-") * parser.fractional)
  parser.humanized_number = lpeg.Cf(lpeg.Cc(1) *
      (parser.number / tonumber) *
      (((lpeg.S("kmg") * (lpeg.P("b") ^ -1)) / parse_suffix) ^ -1),
      function (acc, val) return acc * val end)

  local t = lpeg.match(parser.humanized_number, str)

  return t
end

exports.dehumanize_number = dehumanize_number

--[[[
-- @function lua_util.table_cmp(t1, t2)
-- Compare two tables deeply
--]]
local function table_cmp(table1, table2)
  local avoid_loops = {}
  local function recurse(t1, t2)
    if type(t1) ~= type(t2) then return false end
    if type(t1) ~= "table" then return t1 == t2 end

    if avoid_loops[t1] then return avoid_loops[t1] == t2 end
    avoid_loops[t1] = t2
    -- Copy keys from t2
    local t2keys = {}
    local t2tablekeys = {}
    for k, _ in pairs(t2) do
      if type(k) == "table" then table.insert(t2tablekeys, k) end
      t2keys[k] = true
    end
    -- Let's iterate keys from t1
    for k1, v1 in pairs(t1) do
      local v2 = t2[k1]
      if type(k1) == "table" then
        -- if key is a table, we need to find an equivalent one.
        local ok = false
        for i, tk in ipairs(t2tablekeys) do
          if table_cmp(k1, tk) and recurse(v1, t2[tk]) then
            table.remove(t2tablekeys, i)
            t2keys[tk] = nil
            ok = true
            break
          end
        end
        if not ok then return false end
      else
        -- t1 has a key which t2 doesn't have, fail.
        if v2 == nil then return false end
        t2keys[k1] = nil
        if not recurse(v1, v2) then return false end
      end
    end
    -- if t2 has a key which t1 doesn't have, fail.
    if next(t2keys) then return false end
    return true
  end
  return recurse(table1, table2)
end

exports.table_cmp = table_cmp

--[[[
-- @function lua_util.table_cmp(task, name, value, stop_chars)
-- Performs header folding
--]]
exports.fold_header = function(task, name, value, stop_chars)

  local how

  if task:has_flag("milter") then
    how = "lf"
  else
    how = task:get_newlines_type()
  end

  return rspamd_util.fold_header(name, value, how, stop_chars)
end

--[[[
-- @function lua_util.override_defaults(defaults, override)
-- Overrides values from defaults with override
--]]
local function override_defaults(def, override)
  -- Corner cases
  if not override or type(override) ~= 'table' then
    return def
  end
  if not def or type(def) ~= 'table' then
    return override
  end

  local res = {}

  for k,v in pairs(override) do
    if type(v) == 'table' then
      if def[k] and type(def[k]) == 'table' then
        -- Recursively override elements
        res[k] = override_defaults(def[k], v)
      else
        res[k] = v
      end
    else
      res[k] = v
    end
  end

  for k,v in pairs(def) do
    if type(res[k]) == 'nil' then
      res[k] = v
    end
  end

  return res
end

exports.override_defaults = override_defaults

--[[[
-- @function lua_util.filter_specific_urls(urls, params)
-- params: {
- - task - if needed to save in the cache
- - limit <int> (default = 9999)
- - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
                                      works only if number of unique eSLD less than `limit`
- - need_emails <bool> (default = false)
- - filter <callback> (default = nil)
- - prefix <string> cache prefix (default = nil)
-- }
-- Apply heuristic in extracting of urls from `urls` table, this function
-- tries its best to extract specific number of urls from a task based on
-- their characteristics
--]]
exports.filter_specific_urls = function (urls, params)
  local cache_key

  if params.task and not params.no_cache then
    if params.prefix then
      cache_key = params.prefix
    else
      cache_key = string.format('sp_urls_%d%s', params.limit,
          tostring(params.need_emails or false))
    end
    local cached = params.task:cache_get(cache_key)

    if cached then
      return cached
    end
  end

  if not urls then return {} end

  if params.filter then urls = fun.totable(fun.filter(params.filter, urls)) end

  if #urls <= params.limit and #urls <= params.esld_limit then
    if params.task and not params.no_cache then
      params.task:cache_set(cache_key, urls)
    end

    return urls
  end

  -- Filter by tld:
  local tlds = {}
  local eslds = {}
  local ntlds, neslds = 0, 0

  local res = {}
  local nres = 0

  local function insert_url(str, u)
    if not res[str] then
      res[str] = u
      nres = nres + 1

      return true
    end

    return false
  end

  local function process_single_url(u, default_priority)
    local priority = default_priority or 1 -- Normal priority
    local flags = u:get_flags()
    if params.ignore_ip and flags.numeric then
      return
    end

    if flags.redirected then
      local redir = u:get_redirected() -- get the real url

      if params.ignore_redirected then
        -- Replace `u` with redir
        u = redir
        priority = 2
      else
        -- Process both redirected url and the original one
        process_single_url(redir, 2)
      end
    end

    local esld = u:get_tld()
    local str_hash = tostring(u)

    if esld then
      -- Special cases
      if (u:get_protocol() ~= 'mailto') and (not flags.html_displayed) then
        if flags.obscured then
          priority = 3
        else
          if (flags.has_user or flags.has_port) then
            priority = 2
          elseif (flags.subject or flags.phished) then
            priority = 2
          end
        end
      elseif flags.html_displayed then
        priority = 0
      end

      if not eslds[esld] then
        eslds[esld] = {{str_hash, u, priority}}
        neslds = neslds + 1
      else
        if #eslds[esld] < params.esld_limit then
          table.insert(eslds[esld], {str_hash, u, priority})
        end
      end


      -- eSLD - 1 part => tld
      local parts = rspamd_str_split(esld, '.')
      local tld = table.concat(fun.totable(fun.tail(parts)), '.')

      if not tlds[tld] then
        tlds[tld] = {{str_hash, u, priority}}
        ntlds = ntlds + 1
      else
        table.insert(tlds[tld], {str_hash, u, priority})
      end
    end
  end

  for _,u in ipairs(urls) do
    process_single_url(u)
  end

  local limit = params.limit
  limit = limit - nres
  if limit < 0 then limit = 0 end

  if limit == 0 then
    res = exports.values(res)
    if params.task and not params.no_cache then
      params.task:cache_set(cache_key, res)
    end
    return res
  end

  -- Sort eSLDs and tlds
  local function sort_stuff(tbl)
    -- Sort according to max priority
    table.sort(tbl, function(e1, e2)
      -- Sort by priority so max priority is at the end
      table.sort(e1, function(tr1, tr2)
        return tr1[3] < tr2[3]
      end)
      table.sort(e2, function(tr1, tr2)
        return tr1[3] < tr2[3]
      end)

      if e1[#e1][3] ~= e2[#e2][3] then
        -- Sort by priority so max priority is at the beginning
        return e1[#e1][3] > e2[#e2][3]
      else
        -- Prefer less urls to more urls per esld
        return #e1 < #e2
      end

    end)

    return tbl
  end

  eslds = sort_stuff(exports.values(eslds))
  neslds = #eslds

  if neslds <= limit then
    -- Number of eslds < limit
    repeat
      local item_found = false

      for _,lurls in ipairs(eslds) do
        if #lurls > 0 then
          local last = table.remove(lurls)
          insert_url(last[1], last[2])
          limit = limit - 1
          item_found = true
        end
      end

    until limit <= 0 or not item_found

    res = exports.values(res)
    if params.task and not params.no_cache then
      params.task:cache_set(cache_key, res)
    end
    return res
  end

  tlds = sort_stuff(exports.values(tlds))
  ntlds = #tlds

  -- Number of tlds < limit
  while limit > 0 do
    for _,lurls in ipairs(tlds) do
      if #lurls > 0 then
        local last = table.remove(lurls)
        insert_url(last[1], last[2])
        limit = limit - 1
      end
      if limit == 0 then break end
    end
  end

  res = exports.values(res)
  if params.task and not params.no_cache then
    params.task:cache_set(cache_key, res)
  end
  return res
end

--[[[
-- @function lua_util.extract_specific_urls(params)
-- params: {
- - task
- - limit <int> (default = 9999)
- - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
                                      works only if number of unique eSLD less than `limit`
- - need_emails <bool> (default = false)
- - filter <callback> (default = nil)
- - prefix <string> cache prefix (default = nil)
- - ignore_redirected <bool> (default = false)
-- }
-- Apply heuristic in extracting of urls from task, this function
-- tries its best to extract specific number of urls from a task based on
-- their characteristics
--]]
-- exports.extract_specific_urls = function(params_or_task, limit, need_emails, filter, prefix)
exports.extract_specific_urls = function(params_or_task, lim, need_emails, filter, prefix)
  local default_params = {
    limit = 9999,
    esld_limit = 9999,
    need_emails = false,
    filter = nil,
    prefix = nil,
    ignore_ip = false,
    ignore_redirected = false,
    no_cache = false,
  }

  local params
  if type(params_or_task) == 'table' and type(lim) == 'nil' then
    params = params_or_task
  else
    -- Deprecated call
    params = {
      task = params_or_task,
      limit = lim,
      need_emails = need_emails,
      filter = filter,
      prefix = prefix
    }
  end
  for k,v in pairs(default_params) do
    if not params[k] then params[k] = v end
  end

  local urls = params.task:get_urls(params.need_emails)

  return exports.filter_specific_urls(urls, params)
end

--[[[
-- @function lua_util.deepcopy(table)
-- params: {
- - table
-- }
-- Performs deep copy of the table. Including metatables
--]]
local function deepcopy(orig)
  local orig_type = type(orig)
  local copy
  if orig_type == 'table' then
    copy = {}
    for orig_key, orig_value in next, orig, nil do
      copy[deepcopy(orig_key)] = deepcopy(orig_value)
    end
    setmetatable(copy, deepcopy(getmetatable(orig)))
  else -- number, string, boolean, etc
    copy = orig
  end
  return copy
end

exports.deepcopy = deepcopy

--[[[
-- @function lua_util.shallowcopy(tbl)
-- Performs shallow (and fast) copy of a table or another Lua type
--]]
exports.shallowcopy = function(orig)
  local orig_type = type(orig)
  local copy
  if orig_type == 'table' then
    copy = {}
    for orig_key, orig_value in pairs(orig) do
      copy[orig_key] = orig_value
    end
  else
    copy = orig
  end
  return copy
end

-- Debugging support
local unconditional_debug = false
local debug_modules = {}
local debug_aliases = {}
local log_level = 384 -- debug + forced (1 << 7 | 1 << 8)


exports.init_debug_logging = function(config)
  local logger = require "rspamd_logger"
  -- Fill debug modules from the config
  local logging = config:get_all_opt('logging')
  if logging then
    local log_level_str = logging.level
    if log_level_str then
      if log_level_str == 'debug' then
        unconditional_debug = true
      end
    end

    if not unconditional_debug then
      if logging.debug_modules then
        for _,m in ipairs(logging.debug_modules) do
          debug_modules[m] = true
          logger.infox(config, 'enable debug for Lua module %s', m)
        end
      end

      if #debug_aliases > 0 then
        for alias,mod in pairs(debug_aliases) do
          if debug_modules[mod] then
            debug_modules[alias] = true
            logger.infox(config, 'enable debug for Lua module %s (%s aliased)',
                alias, mod)
          end
        end
      end
    end
  end
end

--[[[
-- @function lua_util.debugm(module, [log_object], format, ...)
-- Performs fast debug log for a specific module
--]]
exports.debugm = function(mod, obj_or_fmt, fmt_or_something, ...)
  local logger = require "rspamd_logger"
  if unconditional_debug or debug_modules[mod] then
    if type(obj_or_fmt) == 'string' then
      logger.logx(log_level, mod, '', 2, obj_or_fmt, fmt_or_something, ...)
    else
      logger.logx(log_level, mod, obj_or_fmt, 2, fmt_or_something, ...)
    end
  end
end

--[[[
-- @function lua_util.add_debug_alias(mod, alias)
-- Add debugging alias so logging to `alias` will be treated as logging to `mod`
--]]
exports.add_debug_alias = function(mod, alias)
  local logger = require "rspamd_logger"
  debug_aliases[alias] = mod

  if debug_modules[mod] then
    debug_modules[alias] = true
    logger.infox(rspamd_config, 'enable debug for Lua module %s (%s aliased)',
        alias, mod)
  end
end
---[[[
-- @function lua_util.get_task_verdict(task)
-- Returns verdict for a task + score if certain, must be called from idempotent filters only
-- Returns string:
-- * `spam`: if message have over reject threshold and has more than one positive rule
-- * `junk`: if a message has between score between [add_header/rewrite subject] to reject thresholds and has more than two positive rules
-- * `passthrough`: if a message has been passed through some short-circuit rule
-- * `ham`: if a message has overall score below junk level **and** more than three negative rule, or negative total score
-- * `uncertain`: all other cases
--]]
exports.get_task_verdict = function(task)
  local result = task:get_metric_result()

  if result then

    if result.passthrough then
      return 'passthrough',nil
    end

    local score = result.score

    local action = result.action

    if action == 'reject' and result.npositive > 1 then
      return 'spam',score
    elseif action == 'no action' then
      if score < 0 or result.nnegative > 3 then
        return 'ham',score
      end
    else
      -- All colors of junk
      if action == 'add header' or action == 'rewrite subject' then
        if result.npositive > 2 then
          return 'junk',score
        end
      end
    end

    return 'uncertain',score
  end
end

---[[[
-- @function lua_util.maybe_obfuscate_string(subject, settings, prefix)
-- Obfuscate string if enabled in settings. Also checks utf8 validity - if
-- string is not valid utf8 then '???' is returned. Empty string returned as is.
-- Supported settings:
-- * <prefix>_privacy = false - subject privacy is off
-- * <prefix>_privacy_alg = 'blake2' - default hash-algorithm to obfuscate subject
-- * <prefix>_privacy_prefix = 'obf' - prefix to show it's obfuscated
-- * <prefix>_privacy_length = 16 - cut the length of the hash; if 0 or fasle full hash is returned
-- @return obfuscated or validated subject
--]]

exports.maybe_obfuscate_string = function(subject, settings, prefix)
  local hash = require 'rspamd_cryptobox_hash'
  if not subject or subject == '' then
    return subject
  elseif not rspamd_util.is_valid_utf8(subject) then
    subject = '???'
  elseif settings[prefix .. '_privacy'] then
    local hash_alg = settings[prefix .. '_privacy_alg'] or 'blake2'
    local subject_hash = hash.create_specific(hash_alg, subject)

    local strip_len = settings[prefix .. '_privacy_length']
    if strip_len and strip_len > 0 then
      subject = subject_hash:hex():sub(1, strip_len)
    else
      subject = subject_hash:hex()
    end

    local privacy_prefix = settings[prefix .. '_privacy_prefix']
    if privacy_prefix and #privacy_prefix > 0 then
      subject = privacy_prefix .. ':' .. subject
    end
  end

  return subject
end

---[[[
-- @function lua_util.callback_from_string(str)
-- Converts a string like `return function(...) end` to lua function and return true and this function
-- or returns false + error message
-- @return status code and function object or an error message
--]]]
exports.callback_from_string = function(s)
  local loadstring = loadstring or load

  if not s or #s == 0 then
    return false,'invalid or empty string'
  end

  s = exports.rspamd_str_trim(s)
  local inp

  if s:match('^return%s*function') then
    -- 'return function', can be evaluated directly
    inp = s
  elseif s:match('^function%s*%(') then
    inp = 'return ' .. s
  else
    -- Just a plain sequence
    inp = 'return function(...)\n' .. s .. '; end'
  end

  local ret, res_or_err = pcall(loadstring(inp))

  if not ret or type(res_or_err) ~= 'function' then
    return false,res_or_err
  end

  return ret,res_or_err
end

---[[[
-- @function lua_util.keys(t)
-- Returns all keys from a specific table
-- @param {table} t input table (or iterator triplet)
-- @return array of keys
--]]]
exports.keys = function(gen, param, state)
  local keys = {}
  local i = 1

  if param then
    for k,_ in fun.iter(gen, param, state) do
      rawset(keys, i, k)
      i = i + 1
    end
  else
    for k,_ in pairs(gen) do
      rawset(keys, i, k)
      i = i + 1
    end
  end

  return keys
end

---[[[
-- @function lua_util.values(t)
-- Returns all values from a specific table
-- @param {table} t input table
-- @return array of values
--]]]
exports.values = function(gen, param, state)
  local values = {}
  local i = 1

  if param then
    for _,v in fun.iter(gen, param, state) do
      rawset(values, i, v)
      i = i + 1
    end
  else
    for _,v in pairs(gen) do
      rawset(values, i, v)
      i = i + 1
    end
  end

  return values
end

---[[[
-- @function lua_util.distance_sorted(t1, t2)
-- Returns distance between two sorted tables t1 and t2
-- @param {table} t1 input table
-- @param {table} t2 input table
-- @return distance between `t1` and `t2`
--]]]
exports.distance_sorted = function(t1, t2)
  local ncomp = #t1
  local ndiff = 0
  local i,j = 1,1

  if ncomp < #t2 then
    ncomp = #t2
  end

  for _=1,ncomp do
    if j > #t2 then
      ndiff = ndiff + ncomp - #t2
      if i > j then
        ndiff = ndiff - (i - j)
      end
      break
    elseif i > #t1 then
      ndiff = ndiff + ncomp - #t1
      if j > i then
        ndiff = ndiff - (j - i)
      end
      break
    end

    if t1[i] == t2[j] then
      i = i + 1
      j = j + 1
    elseif t1[i] < t2[j] then
      i = i + 1
      ndiff = ndiff + 1
    else
      j = j + 1
      ndiff = ndiff + 1
    end
  end

  return ndiff
end

---[[[
-- @function lua_util.table_digest(t)
-- Returns hash of all values if t[1] is string or all keys otherwise
-- @param {table} t input array or map
-- @return {string} base32 representation of blake2b hash of all strings
--]]]
exports.table_digest = function(t)
  local cr = require "rspamd_cryptobox_hash"
  local h = cr.create()

  if t[1] then
    for _,e in ipairs(t) do
      h:update(tostring(e))
    end
  else
    for k,_ in pairs(t) do
      h:update(k)
    end
  end
 return h:base32()
end

---[[[
-- @function lua_util.toboolean(v)
-- Converts a string or a number to boolean
-- @param {string|number} v
-- @return {boolean} v converted to boolean
--]]]
exports.toboolean = function(v)
  local true_t = {
    ['1'] = true,
    ['true'] = true,
    ['TRUE'] = true,
    ['True'] = true,
  };
  local false_t = {
    ['0'] = false,
    ['false'] = false,
    ['FALSE'] = false,
    ['False'] = false,
  };

  if type(v) == 'string' then
    if true_t[v] == true then
      return true;
    elseif false_t[v] == false then
      return false;
    else
      return false, string.format( 'cannot convert %q to boolean', v);
    end
  elseif type(v) == 'number' then
    return (not (v == 0))
  else
    return false, string.format( 'cannot convert %q to boolean', v);
  end
end

return exports