You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_urls_compose.lua 7.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module lua_urls_compose
  15. -- This module contains functions to compose urls queries from hostname
  16. -- to TLD part
  17. --]]
  18. local N = "lua_urls_compose"
  19. local lua_util = require "lua_util"
  20. local rspamd_util = require "rspamd_util"
  21. local bit = require "bit"
  22. local rspamd_trie = require "rspamd_trie"
  23. local fun = require "fun"
  24. local rspamd_regexp = require "rspamd_regexp"
  25. local maps_cache = {}
  26. local exports = {}
  27. local function process_url(self, log_obj, url_tld, url_host)
  28. local tld_elt = self.tlds[url_tld]
  29. if tld_elt then
  30. lua_util.debugm(N, log_obj, 'found compose tld for %s (host = %s)',
  31. url_tld, url_host)
  32. for _,excl in ipairs(tld_elt.except_rules) do
  33. local matched,ret = excl[2](url_tld, url_host)
  34. if matched then
  35. lua_util.debugm(N, log_obj, 'found compose exclusion for %s (%s) -> %s',
  36. url_host, excl[1], ret)
  37. return ret
  38. end
  39. end
  40. if tld_elt.multipattern_compose_rules then
  41. local matches = tld_elt.multipattern_compose_rules:match(url_host)
  42. if matches then
  43. local lua_pat_idx = math.huge
  44. for m,_ in pairs(matches) do
  45. if m < lua_pat_idx then
  46. lua_pat_idx = m
  47. end
  48. end
  49. if #tld_elt.compose_rules >= lua_pat_idx then
  50. local lua_pat = tld_elt.compose_rules[lua_pat_idx]
  51. local matched,ret = lua_pat[2](url_tld, url_host)
  52. if not matched then
  53. lua_util.debugm(N, log_obj, 'NOT found compose inclusion for %s (%s) -> %s',
  54. url_host, lua_pat[1], url_tld)
  55. return url_tld
  56. else
  57. lua_util.debugm(N, log_obj, 'found compose inclusion for %s (%s) -> %s',
  58. url_host, lua_pat[1], ret)
  59. return ret
  60. end
  61. else
  62. lua_util.debugm(N, log_obj, 'NOT found compose inclusion for %s (%s) -> %s',
  63. url_host, lua_pat_idx, url_tld)
  64. return url_tld
  65. end
  66. end
  67. else
  68. -- Match one by one
  69. for _,lua_pat in ipairs(tld_elt.compose_rules) do
  70. local matched,ret = lua_pat[2](url_tld, url_host)
  71. if matched then
  72. lua_util.debugm(N, log_obj, 'found compose inclusion for %s (%s) -> %s',
  73. url_host, lua_pat[1], ret)
  74. return ret
  75. end
  76. end
  77. end
  78. lua_util.debugm(N, log_obj, 'not found compose inclusion for %s in %s -> %s',
  79. url_host, url_tld, url_tld)
  80. else
  81. lua_util.debugm(N, log_obj, 'not found compose tld for %s in %s -> %s',
  82. url_host, url_tld, url_tld)
  83. end
  84. return url_tld
  85. end
  86. local function tld_pattern_transform(tld_pat)
  87. -- Convert tld like pattern to a lua match pattern
  88. -- blah -> %.blah
  89. -- *.blah -> .*%.blah
  90. local ret
  91. if tld_pat:sub(1, 2) == '*.' then
  92. ret = string.format('^((?:[^.]+\\.)*%s)$', tld_pat:sub(3))
  93. else
  94. ret = string.format('(?:^|\\.)((?:[^.]+\\.)?%s)$', tld_pat)
  95. end
  96. lua_util.debugm(N, nil, 'added pattern %s -> %s',
  97. tld_pat, ret)
  98. return ret
  99. end
  100. local function include_elt_gen(pat)
  101. pat = rspamd_regexp.create(tld_pattern_transform(pat), 'i')
  102. return function(_, host)
  103. local matches = pat:search(host, false, true)
  104. if matches then
  105. return true,matches[1][2]
  106. end
  107. return false
  108. end
  109. end
  110. local function exclude_elt_gen(pat)
  111. pat = rspamd_regexp.create(tld_pattern_transform(pat))
  112. return function(tld, host)
  113. if pat:search(host) then
  114. return true,tld
  115. end
  116. return false
  117. end
  118. end
  119. local function compose_map_cb(self, map_text)
  120. local lpeg = require "lpeg"
  121. local singleline_comment = lpeg.P '#' * (1 - lpeg.S '\r\n\f') ^ 0
  122. local comments_strip_grammar = lpeg.C((1 - lpeg.P '#') ^ 1) * lpeg.S(' \t')^0 * singleline_comment^0
  123. local function process_tld_rule(tld_elt, l)
  124. if l:sub(1, 1) == '!' then
  125. -- Exclusion elt
  126. table.insert(tld_elt.except_rules, {l, exclude_elt_gen(l:sub(2))})
  127. else
  128. table.insert(tld_elt.compose_rules, {l, include_elt_gen(l)})
  129. end
  130. end
  131. local function process_map_line(l)
  132. -- Skip empty lines and comments
  133. if #l == 0 then return end
  134. l = comments_strip_grammar:match(l)
  135. if not l or #l == 0 then return end
  136. -- Get TLD
  137. local tld = rspamd_util.get_tld(l)
  138. if tld then
  139. local tld_elt = self.tlds[tld]
  140. if not tld_elt then
  141. tld_elt = {
  142. compose_rules = {},
  143. except_rules = {},
  144. multipattern_compose_rules = nil
  145. }
  146. lua_util.debugm(N, rspamd_config, 'processed new tld rule for %s', tld)
  147. self.tlds[tld] = tld_elt
  148. end
  149. process_tld_rule(tld_elt, l)
  150. else
  151. lua_util.debugm(N, rspamd_config, 'cannot read tld from compose map line: %s', l)
  152. end
  153. end
  154. for line in map_text:lines() do
  155. process_map_line(line)
  156. end
  157. local multipattern_threshold = 1
  158. for tld,tld_elt in pairs(self.tlds) do
  159. -- Sort patterns to have longest labels before shortest ones,
  160. -- so we can ensure that they match before
  161. table.sort(tld_elt.compose_rules, function(e1, e2)
  162. local _,ndots1 = string.gsub(e1[1], '(%.)', '')
  163. local _,ndots2 = string.gsub(e2[1], '(%.)', '')
  164. return ndots1 > ndots2
  165. end)
  166. if rspamd_trie.has_hyperscan() and #tld_elt.compose_rules >= multipattern_threshold then
  167. lua_util.debugm(N, rspamd_config, 'tld %s has %s rules, apply multipattern',
  168. tld, #tld_elt.compose_rules)
  169. local flags = bit.bor(rspamd_trie.flags.re,
  170. rspamd_trie.flags.dot_all,
  171. rspamd_trie.flags.no_start,
  172. rspamd_trie.flags.icase)
  173. -- We now convert our internal patterns to multipattern patterns
  174. local mp_table = fun.totable(fun.map(function(pat_elt)
  175. return tld_pattern_transform(pat_elt[1])
  176. end, tld_elt.compose_rules))
  177. tld_elt.multipattern_compose_rules = rspamd_trie.create(mp_table, flags)
  178. end
  179. end
  180. end
  181. exports.add_composition_map = function(cfg, map_obj)
  182. local hash_key = map_obj
  183. if type(map_obj) == 'table' then
  184. hash_key = lua_util.table_digest(map_obj)
  185. end
  186. local map = maps_cache[hash_key]
  187. if not map then
  188. local ret = {
  189. process_url = process_url,
  190. hash = hash_key,
  191. tlds = {},
  192. }
  193. map = cfg:add_map{
  194. type = 'callback',
  195. description = 'URL compose map',
  196. url = map_obj,
  197. callback = function(input) compose_map_cb(ret, input) end,
  198. opaque_data = true,
  199. }
  200. ret.map = map
  201. maps_cache[hash_key] = ret
  202. map = ret
  203. end
  204. return map
  205. end
  206. exports.inject_composition_rules = function(cfg, rules)
  207. local hash_key = rules
  208. local rspamd_text = require "rspamd_text"
  209. if type(rules) == 'table' then
  210. hash_key = lua_util.table_digest(rules)
  211. end
  212. local map = maps_cache[hash_key]
  213. if not map then
  214. local ret = {
  215. process_url = process_url,
  216. hash = hash_key,
  217. tlds = {},
  218. }
  219. compose_map_cb(ret, rspamd_text.fromtable(rules, '\n'))
  220. maps_cache[hash_key] = ret
  221. map = ret
  222. end
  223. return map
  224. end
  225. return exports