You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

trie.lua 4.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. if confighelp then
  14. return
  15. end
  16. -- Trie is rspamd module designed to define and operate with suffix trie
  17. local N = 'trie'
  18. local rspamd_logger = require "rspamd_logger"
  19. local rspamd_trie = require "rspamd_trie"
  20. local fun = require "fun"
  21. local lua_util = require "lua_util"
  22. local mime_trie
  23. local raw_trie
  24. local body_trie
  25. -- here we store all patterns as text
  26. local mime_patterns = {}
  27. local raw_patterns = {}
  28. local body_patterns = {}
  29. -- here we store params for each pattern, so for each i = 1..n patterns[i]
  30. -- should have corresponding params[i]
  31. local mime_params = {}
  32. local raw_params = {}
  33. local body_params = {}
  34. local function tries_callback(task)
  35. local matched = {}
  36. local function gen_trie_cb(type)
  37. local patterns = mime_patterns
  38. local params = mime_params
  39. if type == 'rawmessage' then
  40. patterns = raw_patterns
  41. params = raw_params
  42. elseif type == 'rawbody' then
  43. patterns = body_patterns
  44. params = body_params
  45. end
  46. return function (idx, pos)
  47. local param = params[idx]
  48. local pattern = patterns[idx]
  49. local pattern_idx = pattern .. tostring(idx) .. type
  50. if param['multi'] or not matched[pattern_idx] then
  51. lua_util.debugm(N, task, "<%1> matched pattern %2 at pos %3",
  52. task:get_message_id(), pattern, pos)
  53. task:insert_result(param['symbol'], 1.0, type)
  54. if not param['multi'] then
  55. matched[pattern_idx] = true
  56. end
  57. end
  58. end
  59. end
  60. if mime_trie then
  61. mime_trie:search_mime(task, gen_trie_cb('mime'))
  62. end
  63. if raw_trie then
  64. raw_trie:search_rawmsg(task, gen_trie_cb('rawmessage'))
  65. end
  66. if body_trie then
  67. body_trie:search_rawbody(task, gen_trie_cb('rawbody'))
  68. end
  69. end
  70. local function process_single_pattern(pat, symbol, cf)
  71. if pat then
  72. local multi = false
  73. if cf['multi'] then multi = true end
  74. if cf['raw'] then
  75. table.insert(raw_patterns, pat)
  76. table.insert(raw_params, {symbol=symbol, multi=multi})
  77. elseif cf['body'] then
  78. table.insert(body_patterns, pat)
  79. table.insert(body_params, {symbol=symbol, multi=multi})
  80. else
  81. table.insert(mime_patterns, pat)
  82. table.insert(mime_params, {symbol=symbol, multi=multi})
  83. end
  84. end
  85. end
  86. local function process_trie_file(symbol, cf)
  87. local file = io.open(cf['file'])
  88. if not file then
  89. rspamd_logger.errx(rspamd_config, 'Cannot open trie file %1', cf['file'])
  90. else
  91. if cf['binary'] then
  92. rspamd_logger.errx(rspamd_config, 'binary trie patterns are not implemented yet: %1',
  93. cf['file'])
  94. else
  95. for line in file:lines() do
  96. local pat = string.match(line, '^([^#].*[^%s])%s*$')
  97. process_single_pattern(pat, symbol, cf)
  98. end
  99. end
  100. end
  101. end
  102. local function process_trie_conf(symbol, cf)
  103. if type(cf) ~= 'table' then
  104. rspamd_logger.errx(rspamd_config, 'invalid value for symbol %1: "%2", expected table',
  105. symbol, cf)
  106. return
  107. end
  108. if cf['file'] then
  109. process_trie_file(symbol, cf)
  110. elseif cf['patterns'] then
  111. fun.each(function(pat)
  112. process_single_pattern(pat, symbol, cf)
  113. end, cf['patterns'])
  114. end
  115. end
  116. local opts = rspamd_config:get_all_opt("trie")
  117. if opts then
  118. for sym, opt in pairs(opts) do
  119. process_trie_conf(sym, opt)
  120. end
  121. if #raw_patterns > 0 then
  122. raw_trie = rspamd_trie.create(raw_patterns)
  123. rspamd_logger.infox(rspamd_config, 'registered raw search trie from %1 patterns', #raw_patterns)
  124. end
  125. if #mime_patterns > 0 then
  126. mime_trie = rspamd_trie.create(mime_patterns)
  127. rspamd_logger.infox(rspamd_config, 'registered mime search trie from %1 patterns', #mime_patterns)
  128. end
  129. if #body_patterns > 0 then
  130. body_trie = rspamd_trie.create(body_patterns)
  131. rspamd_logger.infox(rspamd_config, 'registered body search trie from %1 patterns', #body_patterns)
  132. end
  133. local id = -1
  134. if mime_trie or raw_trie or body_trie then
  135. id = rspamd_config:register_symbol({
  136. name = 'TRIE_CALLBACK',
  137. type = 'callback',
  138. callback = tries_callback
  139. })
  140. else
  141. rspamd_logger.infox(rspamd_config, 'no tries defined')
  142. end
  143. if id ~= -1 then
  144. for sym in pairs(opts) do
  145. rspamd_config:register_symbol({
  146. name = sym,
  147. type = 'virtual',
  148. parent = id
  149. })
  150. end
  151. end
  152. else
  153. rspamd_logger.infox(rspamd_config, "Module is unconfigured")
  154. lua_util.disable_module(N, "config")
  155. end