You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

sa_trivial_convert.lua 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. local fun = require "fun"
  2. local rspamd_logger = require "rspamd_logger"
  3. local util = require "rspamd_util"
  4. local lua_util = require "lua_util"
  5. local rspamd_regexp = require "rspamd_regexp"
  6. local ucl = require "ucl"
  7. local complicated = {}
  8. local rules = {}
  9. local scores = {}
  10. local function words_to_re(words, start)
  11. return table.concat(fun.totable(fun.drop_n(start, words)), " ");
  12. end
  13. local function split(str, delim)
  14. local result = {}
  15. if not delim then
  16. delim = '[^%s]+'
  17. end
  18. for token in string.gmatch(str, delim) do
  19. table.insert(result, token)
  20. end
  21. return result
  22. end
  23. local function handle_header_def(hline, cur_rule)
  24. --Now check for modifiers inside header's name
  25. local hdrs = split(hline, '[^|]+')
  26. local hdr_params = {}
  27. local cur_param = {}
  28. -- Check if an re is an ordinary re
  29. local ordinary = true
  30. for _,h in ipairs(hdrs) do
  31. if h == 'ALL' or h == 'ALL:raw' then
  32. ordinary = false
  33. else
  34. local args = split(h, '[^:]+')
  35. cur_param['strong'] = false
  36. cur_param['raw'] = false
  37. cur_param['header'] = args[1]
  38. if args[2] then
  39. -- We have some ops that are required for the header, so it's not ordinary
  40. ordinary = false
  41. end
  42. fun.each(function(func)
  43. if func == 'addr' then
  44. cur_param['function'] = function(str)
  45. local addr_parsed = util.parse_mail_address(str)
  46. local ret = {}
  47. if addr_parsed then
  48. for _,elt in ipairs(addr_parsed) do
  49. if elt['addr'] then
  50. table.insert(ret, elt['addr'])
  51. end
  52. end
  53. end
  54. return ret
  55. end
  56. elseif func == 'name' then
  57. cur_param['function'] = function(str)
  58. local addr_parsed = util.parse_mail_address(str)
  59. local ret = {}
  60. if addr_parsed then
  61. for _,elt in ipairs(addr_parsed) do
  62. if elt['name'] then
  63. table.insert(ret, elt['name'])
  64. end
  65. end
  66. end
  67. return ret
  68. end
  69. elseif func == 'raw' then
  70. cur_param['raw'] = true
  71. elseif func == 'case' then
  72. cur_param['strong'] = true
  73. else
  74. rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
  75. func, cur_rule['symbol'])
  76. end
  77. end, fun.tail(args))
  78. -- Some header rules require splitting to check of multiple headers
  79. if cur_param['header'] == 'MESSAGEID' then
  80. -- Special case for spamassassin
  81. ordinary = false
  82. elseif cur_param['header'] == 'ToCc' then
  83. ordinary = false
  84. else
  85. table.insert(hdr_params, cur_param)
  86. end
  87. end
  88. cur_rule['ordinary'] = ordinary and #hdr_params <= 1
  89. cur_rule['header'] = hdr_params
  90. end
  91. end
  92. local function process_sa_conf(f)
  93. local cur_rule = {}
  94. local valid_rule = false
  95. local function insert_cur_rule()
  96. if not rules[cur_rule.type] then
  97. rules[cur_rule.type] = {}
  98. end
  99. local target = rules[cur_rule.type]
  100. if cur_rule.type == 'header' then
  101. if not cur_rule.header[1].header then
  102. rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
  103. return
  104. end
  105. if not target[cur_rule.header[1].header] then
  106. target[cur_rule.header[1].header] = {}
  107. end
  108. target = target[cur_rule.header[1].header]
  109. end
  110. if not cur_rule['symbol'] then
  111. rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
  112. return
  113. end
  114. target[cur_rule['symbol']] = cur_rule
  115. cur_rule = {}
  116. valid_rule = false
  117. end
  118. local function parse_score(words)
  119. if #words == 3 then
  120. -- score rule <x>
  121. return tonumber(words[3])
  122. elseif #words == 6 then
  123. -- score rule <x1> <x2> <x3> <x4>
  124. -- we assume here that bayes and network are enabled and select <x4>
  125. return tonumber(words[6])
  126. else
  127. rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
  128. end
  129. return 0
  130. end
  131. local skip_to_endif = false
  132. local if_nested = 0
  133. for l in f:lines() do
  134. (function ()
  135. l = lua_util.rspamd_str_trim(l)
  136. -- Replace bla=~/re/ with bla =~ /re/ (#2372)
  137. l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')
  138. if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
  139. return
  140. end
  141. -- Unbalanced if/endif
  142. if if_nested < 0 then if_nested = 0 end
  143. if skip_to_endif then
  144. if string.match(l, '^endif') then
  145. if_nested = if_nested - 1
  146. if if_nested == 0 then
  147. skip_to_endif = false
  148. end
  149. elseif string.match(l, '^if') then
  150. if_nested = if_nested + 1
  151. elseif string.match(l, '^else') then
  152. -- Else counterpart for if
  153. skip_to_endif = false
  154. end
  155. table.insert(complicated, l)
  156. return
  157. else
  158. if string.match(l, '^ifplugin') then
  159. skip_to_endif = true
  160. if_nested = if_nested + 1
  161. table.insert(complicated, l)
  162. elseif string.match(l, '^if !plugin%(') then
  163. skip_to_endif = true
  164. if_nested = if_nested + 1
  165. table.insert(complicated, l)
  166. elseif string.match(l, '^if') then
  167. -- Unknown if
  168. skip_to_endif = true
  169. if_nested = if_nested + 1
  170. table.insert(complicated, l)
  171. elseif string.match(l, '^else') then
  172. -- Else counterpart for if
  173. skip_to_endif = true
  174. table.insert(complicated, l)
  175. elseif string.match(l, '^endif') then
  176. if_nested = if_nested - 1
  177. table.insert(complicated, l)
  178. end
  179. end
  180. -- Skip comments
  181. local words = fun.totable(fun.take_while(
  182. function(w) return string.sub(w, 1, 1) ~= '#' end,
  183. fun.filter(function(w)
  184. return w ~= "" end,
  185. fun.iter(split(l)))))
  186. if words[1] == "header" then
  187. -- header SYMBOL Header ~= /regexp/
  188. if valid_rule then
  189. insert_cur_rule()
  190. end
  191. if words[4] and (words[4] == '=~' or words[4] == '!~') then
  192. cur_rule['type'] = 'header'
  193. cur_rule['symbol'] = words[2]
  194. if words[4] == '!~' then
  195. table.insert(complicated, l)
  196. return
  197. end
  198. cur_rule['re_expr'] = words_to_re(words, 4)
  199. local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
  200. if unset_comp then
  201. table.insert(complicated, l)
  202. return
  203. end
  204. cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
  205. if not cur_rule['re'] then
  206. rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
  207. cur_rule['re_expr'], cur_rule['symbol'])
  208. table.insert(complicated, l)
  209. return
  210. else
  211. handle_header_def(words[3], cur_rule)
  212. if not cur_rule['ordinary'] then
  213. table.insert(complicated, l)
  214. return
  215. end
  216. end
  217. valid_rule = true
  218. else
  219. table.insert(complicated, l)
  220. return
  221. end
  222. elseif words[1] == "body" then
  223. -- body SYMBOL /regexp/
  224. if valid_rule then
  225. insert_cur_rule()
  226. end
  227. cur_rule['symbol'] = words[2]
  228. if words[3] and (string.sub(words[3], 1, 1) == '/'
  229. or string.sub(words[3], 1, 1) == 'm') then
  230. cur_rule['type'] = 'sabody'
  231. cur_rule['re_expr'] = words_to_re(words, 2)
  232. cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
  233. if cur_rule['re'] then
  234. valid_rule = true
  235. end
  236. else
  237. -- might be function
  238. table.insert(complicated, l)
  239. return
  240. end
  241. elseif words[1] == "rawbody" then
  242. -- body SYMBOL /regexp/
  243. if valid_rule then
  244. insert_cur_rule()
  245. end
  246. cur_rule['symbol'] = words[2]
  247. if words[3] and (string.sub(words[3], 1, 1) == '/'
  248. or string.sub(words[3], 1, 1) == 'm') then
  249. cur_rule['type'] = 'sarawbody'
  250. cur_rule['re_expr'] = words_to_re(words, 2)
  251. cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
  252. if cur_rule['re'] then
  253. valid_rule = true
  254. end
  255. else
  256. table.insert(complicated, l)
  257. return
  258. end
  259. elseif words[1] == "full" then
  260. -- body SYMBOL /regexp/
  261. if valid_rule then
  262. insert_cur_rule()
  263. end
  264. cur_rule['symbol'] = words[2]
  265. if words[3] and (string.sub(words[3], 1, 1) == '/'
  266. or string.sub(words[3], 1, 1) == 'm') then
  267. cur_rule['type'] = 'message'
  268. cur_rule['re_expr'] = words_to_re(words, 2)
  269. cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
  270. cur_rule['raw'] = true
  271. if cur_rule['re'] then
  272. valid_rule = true
  273. end
  274. else
  275. table.insert(complicated, l)
  276. return
  277. end
  278. elseif words[1] == "uri" then
  279. -- uri SYMBOL /regexp/
  280. if valid_rule then
  281. insert_cur_rule()
  282. end
  283. cur_rule['type'] = 'uri'
  284. cur_rule['symbol'] = words[2]
  285. cur_rule['re_expr'] = words_to_re(words, 2)
  286. cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
  287. if cur_rule['re'] and cur_rule['symbol'] then
  288. valid_rule = true
  289. else
  290. table.insert(complicated, l)
  291. return
  292. end
  293. elseif words[1] == "meta" then
  294. -- meta SYMBOL expression
  295. if valid_rule then
  296. insert_cur_rule()
  297. end
  298. table.insert(complicated, l)
  299. return
  300. elseif words[1] == "describe" and valid_rule then
  301. cur_rule['description'] = words_to_re(words, 2)
  302. elseif words[1] == "score" then
  303. scores[words[2]] = parse_score(words)
  304. else
  305. table.insert(complicated, l)
  306. return
  307. end
  308. end)()
  309. end
  310. if valid_rule then
  311. insert_cur_rule()
  312. end
  313. end
  314. for _,matched in ipairs(arg) do
  315. local f = io.open(matched, "r")
  316. if f then
  317. rspamd_logger.messagex(rspamd_config, 'loading SA rules from %s', matched)
  318. process_sa_conf(f)
  319. else
  320. rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
  321. end
  322. end
  323. local multimap_conf = {}
  324. local function handle_rule(what, syms, hdr)
  325. local mtype
  326. local filter
  327. local fname
  328. local header
  329. local sym = what:upper()
  330. if what == 'sabody' then
  331. mtype = 'content'
  332. fname = 'body_re.map'
  333. filter = 'oneline'
  334. elseif what == 'sarawbody' then
  335. fname = 'raw_body_re.map'
  336. mtype = 'content'
  337. filter = 'rawtext'
  338. elseif what == 'full' then
  339. fname = 'full_re.map'
  340. mtype = 'content'
  341. filter = 'full'
  342. elseif what == 'uri' then
  343. fname = 'uri_re.map'
  344. mtype = 'url'
  345. filter = 'full'
  346. elseif what == 'header' then
  347. fname = ('hdr_' .. hdr .. '_re.map'):lower()
  348. mtype = 'header'
  349. header = hdr
  350. sym = sym .. '_' .. hdr:upper()
  351. else
  352. rspamd_logger.errx('unknown type: %s', what)
  353. return
  354. end
  355. local conf = {
  356. type = mtype,
  357. filter = filter,
  358. symbol = 'SA_MAP_AUTO_' .. sym,
  359. regexp = true,
  360. map = fname,
  361. header = header,
  362. symbols = {}
  363. }
  364. local re_file = io.open(fname, 'w')
  365. for k,r in pairs(syms) do
  366. local score = 0.0
  367. if scores[k] then
  368. score = scores[k]
  369. end
  370. re_file:write(string.format('/%s/ %s:%f\n', tostring(r.re), k, score))
  371. table.insert(conf.symbols, k)
  372. end
  373. re_file:close()
  374. multimap_conf[sym:lower()] = conf
  375. rspamd_logger.messagex('stored %s regexp in %s', sym:lower(), fname)
  376. end
  377. for k,v in pairs(rules) do
  378. if k == 'header' then
  379. for h,r in pairs(v) do
  380. handle_rule(k, r, h)
  381. end
  382. else
  383. handle_rule(k, v)
  384. end
  385. end
  386. local out = ucl.to_format(multimap_conf, 'ucl')
  387. local mmap_conf = io.open('auto_multimap.conf', 'w')
  388. mmap_conf:write(out)
  389. mmap_conf:close()
  390. rspamd_logger.messagex('stored multimap conf in %s', 'auto_multimap.conf')
  391. local sa_remain = io.open('auto_sa.conf', 'w')
  392. fun.each(function(l)
  393. sa_remain:write(l)
  394. sa_remain:write('\n')
  395. end, fun.filter(function(l) return not string.match(l, '^%s+$') end, complicated))
  396. sa_remain:close()
  397. rspamd_logger.messagex('stored sa remains conf in %s', 'auto_sa.conf')