You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_urls_compose.lua 7.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module lua_urls_compose
  15. -- This module contains functions to compose urls queries from hostname
  16. -- to TLD part
  17. --]]
  18. local N = "lua_urls_compose"
  19. local lua_util = require "lua_util"
  20. local rspamd_util = require "rspamd_util"
  21. local bit = require "bit"
  22. local rspamd_trie = require "rspamd_trie"
  23. local fun = require "fun"
  24. local rspamd_regexp = require "rspamd_regexp"
  25. local maps_cache = {}
  26. local exports = {}
  27. local function process_url(self, log_obj, url_tld, url_host)
  28. local tld_elt = self.tlds[url_tld]
  29. if tld_elt then
  30. lua_util.debugm(N, log_obj, 'found compose tld for %s (host = %s)',
  31. url_tld, url_host)
  32. for _, excl in ipairs(tld_elt.except_rules) do
  33. local matched, ret = excl[2](url_tld, url_host)
  34. if matched then
  35. lua_util.debugm(N, log_obj, 'found compose exclusion for %s (%s) -> %s',
  36. url_host, excl[1], ret)
  37. return ret
  38. end
  39. end
  40. if tld_elt.multipattern_compose_rules then
  41. local matches = tld_elt.multipattern_compose_rules:match(url_host)
  42. if matches then
  43. local lua_pat_idx = math.huge
  44. for m, _ in pairs(matches) do
  45. if m < lua_pat_idx then
  46. lua_pat_idx = m
  47. end
  48. end
  49. if #tld_elt.compose_rules >= lua_pat_idx then
  50. local lua_pat = tld_elt.compose_rules[lua_pat_idx]
  51. local matched, ret = lua_pat[2](url_tld, url_host)
  52. if not matched then
  53. lua_util.debugm(N, log_obj, 'NOT found compose inclusion for %s (%s) -> %s',
  54. url_host, lua_pat[1], url_tld)
  55. return url_tld
  56. else
  57. lua_util.debugm(N, log_obj, 'found compose inclusion for %s (%s) -> %s',
  58. url_host, lua_pat[1], ret)
  59. return ret
  60. end
  61. else
  62. lua_util.debugm(N, log_obj, 'NOT found compose inclusion for %s (%s) -> %s',
  63. url_host, lua_pat_idx, url_tld)
  64. return url_tld
  65. end
  66. end
  67. else
  68. -- Match one by one
  69. for _, lua_pat in ipairs(tld_elt.compose_rules) do
  70. local matched, ret = lua_pat[2](url_tld, url_host)
  71. if matched then
  72. lua_util.debugm(N, log_obj, 'found compose inclusion for %s (%s) -> %s',
  73. url_host, lua_pat[1], ret)
  74. return ret
  75. end
  76. end
  77. end
  78. lua_util.debugm(N, log_obj, 'not found compose inclusion for %s in %s -> %s',
  79. url_host, url_tld, url_tld)
  80. else
  81. lua_util.debugm(N, log_obj, 'not found compose tld for %s in %s -> %s',
  82. url_host, url_tld, url_tld)
  83. end
  84. return url_tld
  85. end
  86. local function tld_pattern_transform(tld_pat)
  87. -- Convert tld like pattern to a lua match pattern
  88. -- blah -> %.blah
  89. -- *.blah -> .*%.blah
  90. local ret
  91. if tld_pat:sub(1, 2) == '*.' then
  92. ret = string.format('^((?:[^.]+\\.)*%s)$', tld_pat:sub(3))
  93. else
  94. ret = string.format('(?:^|\\.)((?:[^.]+\\.)?%s)$', tld_pat)
  95. end
  96. lua_util.debugm(N, nil, 'added pattern %s -> %s',
  97. tld_pat, ret)
  98. return ret
  99. end
  100. local function include_elt_gen(pat)
  101. pat = rspamd_regexp.create(tld_pattern_transform(pat), 'i')
  102. return function(_, host)
  103. local matches = pat:search(host, false, true)
  104. if matches then
  105. return true, matches[1][2]
  106. end
  107. return false
  108. end
  109. end
  110. local function exclude_elt_gen(pat)
  111. pat = rspamd_regexp.create(tld_pattern_transform(pat))
  112. return function(tld, host)
  113. if pat:search(host) then
  114. return true, tld
  115. end
  116. return false
  117. end
  118. end
  119. local function compose_map_cb(self, map_text)
  120. local lpeg = require "lpeg"
  121. local singleline_comment = lpeg.P '#' * (1 - lpeg.S '\r\n\f') ^ 0
  122. local comments_strip_grammar = lpeg.C((1 - lpeg.P '#') ^ 1) * lpeg.S(' \t') ^ 0 * singleline_comment ^ 0
  123. local function process_tld_rule(tld_elt, l)
  124. if l:sub(1, 1) == '!' then
  125. -- Exclusion elt
  126. table.insert(tld_elt.except_rules, { l, exclude_elt_gen(l:sub(2)) })
  127. else
  128. table.insert(tld_elt.compose_rules, { l, include_elt_gen(l) })
  129. end
  130. end
  131. local function process_map_line(l)
  132. -- Skip empty lines and comments
  133. if #l == 0 then
  134. return
  135. end
  136. l = comments_strip_grammar:match(l)
  137. if not l or #l == 0 then
  138. return
  139. end
  140. -- Get TLD
  141. local tld = rspamd_util.get_tld(l)
  142. if tld then
  143. local tld_elt = self.tlds[tld]
  144. if not tld_elt then
  145. tld_elt = {
  146. compose_rules = {},
  147. except_rules = {},
  148. multipattern_compose_rules = nil
  149. }
  150. lua_util.debugm(N, rspamd_config, 'processed new tld rule for %s', tld)
  151. self.tlds[tld] = tld_elt
  152. end
  153. process_tld_rule(tld_elt, l)
  154. else
  155. lua_util.debugm(N, rspamd_config, 'cannot read tld from compose map line: %s', l)
  156. end
  157. end
  158. for line in map_text:lines() do
  159. process_map_line(line)
  160. end
  161. local multipattern_threshold = 1
  162. for tld, tld_elt in pairs(self.tlds) do
  163. -- Sort patterns to have longest labels before shortest ones,
  164. -- so we can ensure that they match before
  165. table.sort(tld_elt.compose_rules, function(e1, e2)
  166. local _, ndots1 = string.gsub(e1[1], '(%.)', '')
  167. local _, ndots2 = string.gsub(e2[1], '(%.)', '')
  168. return ndots1 > ndots2
  169. end)
  170. if rspamd_trie.has_hyperscan() and #tld_elt.compose_rules >= multipattern_threshold then
  171. lua_util.debugm(N, rspamd_config, 'tld %s has %s rules, apply multipattern',
  172. tld, #tld_elt.compose_rules)
  173. local flags = bit.bor(rspamd_trie.flags.re,
  174. rspamd_trie.flags.dot_all,
  175. rspamd_trie.flags.no_start,
  176. rspamd_trie.flags.icase)
  177. -- We now convert our internal patterns to multipattern patterns
  178. local mp_table = fun.totable(fun.map(function(pat_elt)
  179. return tld_pattern_transform(pat_elt[1])
  180. end, tld_elt.compose_rules))
  181. tld_elt.multipattern_compose_rules = rspamd_trie.create(mp_table, flags)
  182. end
  183. end
  184. end
  185. exports.add_composition_map = function(cfg, map_obj)
  186. local hash_key = map_obj
  187. if type(map_obj) == 'table' then
  188. hash_key = lua_util.table_digest(map_obj)
  189. end
  190. local map = maps_cache[hash_key]
  191. if not map then
  192. local ret = {
  193. process_url = process_url,
  194. hash = hash_key,
  195. tlds = {},
  196. }
  197. map = cfg:add_map {
  198. type = 'callback',
  199. description = 'URL compose map',
  200. url = map_obj,
  201. callback = function(input)
  202. compose_map_cb(ret, input)
  203. end,
  204. opaque_data = true,
  205. }
  206. ret.map = map
  207. maps_cache[hash_key] = ret
  208. map = ret
  209. end
  210. return map
  211. end
  212. exports.inject_composition_rules = function(cfg, rules)
  213. local hash_key = rules
  214. local rspamd_text = require "rspamd_text"
  215. if type(rules) == 'table' then
  216. hash_key = lua_util.table_digest(rules)
  217. end
  218. local map = maps_cache[hash_key]
  219. if not map then
  220. local ret = {
  221. process_url = process_url,
  222. hash = hash_key,
  223. tlds = {},
  224. }
  225. compose_map_cb(ret, rspamd_text.fromtable(rules, '\n'))
  226. maps_cache[hash_key] = ret
  227. map = ret
  228. end
  229. return map
  230. end
  231. return exports