You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

url_tags.lua 7.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. --[[
  2. Copyright (c) 2016-2017, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Copyright (c) 2017, Andrew Lewis <nerf@judo.za.org>
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. ]]--
  14. if confighelp then
  15. return
  16. end
  17. -- A plugin that restores/persists URL tags
  18. local N = 'url_tags'
  19. local redis_params, redis_set_script_id
  20. local settings = {
  21. -- lifetime for tags
  22. expire = 3600, -- 1 hour
  23. -- prefix for redis keys
  24. key_prefix = 'Ut.',
  25. -- tags in this list are not persisted
  26. ignore_tags = {},
  27. }
  28. local rspamd_logger = require "rspamd_logger"
  29. local rspamd_util = require "rspamd_util"
  30. local lua_util = require "lua_util"
  31. local lua_redis = require "lua_redis"
  32. local ucl = require "ucl"
  33. -- Tags are stored in format: [timestamp]|[tag1],[timestamp]|[tag2]
  34. local redis_set_script_head = 'local expiry = '
  35. local redis_set_script_tail = [[
  36. local now = math.floor(table.remove(ARGV))
  37. local res = redis.call('MGET', unpack(KEYS))
  38. local data = {}
  39. for i = 1, #res do
  40. local which = KEYS[i]
  41. if type(res[i]) == 'string' then
  42. data[which] = {}
  43. for goo in string.gmatch(res[i], '[^/]+') do
  44. local metatags = {}
  45. local time, tag, meta = string.match(goo, '(%d+)|([^|]+)|(.+)')
  46. if (time + expiry) > now then
  47. for m in string.gmatch(meta, '[^,]+') do
  48. metatags[m] = true
  49. end
  50. data[which][tag] = {time, metatags}
  51. end
  52. end
  53. end
  54. for goo in string.gmatch(ARGV[i], '[^/]+') do
  55. local metatags = {}
  56. if not data[which] then
  57. data[which] = {}
  58. end
  59. local tag, meta = string.match(goo, '([^|]+)|(.+)')
  60. for m in string.gmatch(meta, '[^,]+') do
  61. metatags[m] = true
  62. end
  63. data[which][tag] = {now, metatags}
  64. end
  65. local tmp2 = {}
  66. for k, v in pairs(data[which]) do
  67. local meta_list = {}
  68. for kk in pairs(v[2]) do
  69. table.insert(meta_list, kk)
  70. end
  71. table.insert(tmp2, v[1] .. '|' .. k .. '|' .. table.concat(meta_list, ','))
  72. end
  73. redis.call('SETEX', which, expiry, table.concat(tmp2, '/'))
  74. end
  75. ]]
  76. -- Function to load the script
  77. local function load_scripts(cfg, ev_base)
  78. local set_script =
  79. redis_set_script_head ..
  80. settings.expire ..
  81. '\n' ..
  82. redis_set_script_tail
  83. redis_set_script_id = lua_redis.add_redis_script(set_script, redis_params)
  84. end
  85. -- Saves tags to redis
  86. local function tags_save(task)
  87. local tags = {}
  88. -- Figure out what tags are present for each TLD
  89. for _, url in ipairs(task:get_urls(false)) do
  90. local utags = url:get_tags()
  91. if next(utags) then
  92. local tld = url:get_tld()
  93. if not tags[tld] then
  94. tags[tld] = {}
  95. end
  96. for ut, utv in pairs(utags) do
  97. if not settings.ignore_tags[ut] then
  98. if not tags[tld][ut] then
  99. tags[tld][ut] = {}
  100. end
  101. for _, e in ipairs(utv) do
  102. tags[tld][ut][e] = true
  103. end
  104. end
  105. end
  106. end
  107. end
  108. if not next(tags) then
  109. return
  110. end
  111. -- Don't populate old tags
  112. local old_tags = task:get_mempool():get_variable('urltags')
  113. if old_tags then
  114. local parser = ucl.parser()
  115. local res, err = parser:parse_string(old_tags)
  116. if not res then
  117. rspamd_logger.errx(task, 'Parser error: %s', err)
  118. return
  119. end
  120. local obj = parser:get_object()
  121. for dom, domtags in pairs(obj) do
  122. if tags[dom] then
  123. for tag, mtags in pairs(domtags) do
  124. for mtag in pairs(mtags) do
  125. tags[dom][tag][mtag] = nil
  126. end
  127. if not next(tags[dom][tag]) then
  128. tags[dom][tag] = nil
  129. end
  130. end
  131. if not next(tags[dom]) then
  132. tags[dom] = nil
  133. end
  134. end
  135. end
  136. end
  137. -- Abort if no tags remaining
  138. if not next(tags) then
  139. return
  140. end
  141. -- Prepare arguments to send to Redis
  142. local redis_keys = {}
  143. local redis_args = {}
  144. local tmp3 = {}
  145. for dom, domtags in pairs(tags) do
  146. local tmp = {}
  147. for tag, mtags in pairs(domtags) do
  148. local tmp2 = {}
  149. for k in pairs(mtags) do
  150. table.insert(tmp2, tostring(rspamd_util.encode_base32(k)))
  151. end
  152. tmp[tag] = tmp2
  153. end
  154. tmp3[dom] = tmp
  155. end
  156. for dom, domtags in pairs(tmp3) do
  157. table.insert(redis_keys, settings.key_prefix .. dom)
  158. local tmp4 = {}
  159. for tag, mtags in pairs(domtags) do
  160. table.insert(tmp4, tag .. '|' .. table.concat(mtags, ','))
  161. end
  162. table.insert(redis_args, table.concat(tmp4, '/'))
  163. end
  164. table.insert(redis_args, rspamd_util.get_time())
  165. -- Send query to redis
  166. lua_redis.exec_redis_script(
  167. redis_set_script_id,
  168. {task = task, is_write = true},
  169. function() end, redis_keys, redis_args)
  170. end
  171. local function tags_restore(task)
  172. local urls
  173. local tlds = {}
  174. local tld_reverse = {}
  175. local mpool = task:get_mempool()
  176. local function redis_get_cb(err, data)
  177. if err then
  178. rspamd_logger.errx(task, 'Redis error: %s', err)
  179. return
  180. end
  181. local d_len = #data
  182. if d_len == 0 then return end
  183. local now = rspamd_util.get_time()
  184. local tracking = {}
  185. for i = 1, d_len do
  186. if type(data[i]) == 'string' then
  187. local tld = tld_reverse[i]
  188. for goo in string.gmatch(data[i], '[^/]+') do
  189. for time, tag, meta in string.gmatch(goo, '(%d+)|([^|]+)|(.+)') do
  190. if not settings.ignore_tags[tag] then
  191. if (time + settings.expire) > now then
  192. local metatags = {}
  193. for m in string.gmatch(meta, '[^,]+') do
  194. table.insert(metatags, m)
  195. end
  196. for _, idx in ipairs(tlds[tld]) do
  197. if not tracking[tld] then
  198. tracking[tld] = {}
  199. end
  200. if not tracking[tld][tag] then
  201. tracking[tld][tag] = {}
  202. end
  203. for _, ttag in ipairs(metatags) do
  204. urls[idx]:add_tag(tag, tostring(rspamd_util.decode_base32(ttag)), mpool)
  205. tracking[tld][tag][ttag] = true
  206. end
  207. end
  208. end
  209. end
  210. end
  211. end
  212. end
  213. end
  214. mpool:set_variable('urltags', ucl.to_format(tracking, 'ucl'))
  215. end
  216. urls = task:get_urls(false)
  217. for idx = 1, #urls do
  218. local tld = urls[idx]:get_tld()
  219. tld_reverse[idx] = tld
  220. if not tlds[tld] then
  221. tlds[tld] = {}
  222. end
  223. table.insert(tlds[tld], idx)
  224. end
  225. local first = next(tlds)
  226. if first then
  227. local keys = {}
  228. for x in pairs(tlds) do
  229. table.insert(keys, settings.key_prefix .. x)
  230. end
  231. rspamd_redis_make_request(task,
  232. redis_params,
  233. first,
  234. false, -- is write
  235. redis_get_cb, --callback
  236. 'MGET', -- command
  237. keys
  238. )
  239. end
  240. end
  241. if not lua_util.check_experimental(N) then
  242. return
  243. end
  244. local opts = rspamd_config:get_all_opt(N)
  245. if not opts then return end
  246. redis_params = rspamd_parse_redis_server(N)
  247. if not redis_params then
  248. lua_util.disable_module(N, "redis")
  249. rspamd_logger.warnx(rspamd_config, 'no servers are specified, disabling module')
  250. return
  251. end
  252. for k, v in pairs(opts) do
  253. settings[k] = v
  254. end
  255. settings.ignore_tags = lua_util.list_to_hash(settings.ignore_tags)
  256. rspamd_config:add_on_load(function(cfg, ev_base, worker)
  257. load_scripts(cfg, ev_base)
  258. end)
  259. rspamd_config:register_symbol({
  260. name = 'URL_TAGS_SAVE',
  261. type = 'postfilter',
  262. callback = tags_save,
  263. priority = 10
  264. })
  265. rspamd_config:register_symbol({
  266. name = 'URL_TAGS_RESTORE',
  267. type = 'prefilter',
  268. callback = tags_restore,
  269. priority = 5
  270. })