You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

pyzor.lua 6.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. --[[
  2. Copyright (c) 2021, defkev <defkev@gmail.com>
  3. Copyright (c) 2018, Carsten Rosenberg <c.rosenberg@heinlein-support.de>
  4. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  5. Licensed under the Apache License, Version 2.0 (the "License");
  6. you may not use this file except in compliance with the License.
  7. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ]]--
  15. --[[[
  16. -- @module pyzor
  17. -- This module contains pyzor access functions
  18. --]]
  19. local lua_util = require "lua_util"
  20. local tcp = require "rspamd_tcp"
  21. local upstream_list = require "rspamd_upstream_list"
  22. local rspamd_logger = require "rspamd_logger"
  23. local common = require "lua_scanners/common"
  24. local N = 'pyzor'
  25. local categories = { 'pyzor', 'bulk', 'hash', 'scanner' }
  26. local function pyzor_config(opts)
  27. local pyzor_conf = {
  28. text_part_min_words = 2,
  29. default_port = 5953,
  30. timeout = 15.0,
  31. log_clean = false,
  32. retransmits = 2,
  33. detection_category = "hash",
  34. cache_expire = 7200, -- expire redis in one hour
  35. message = '${SCANNER}: Pyzor bulk message found: "${VIRUS}"',
  36. default_score = 1.5,
  37. action = false,
  38. }
  39. pyzor_conf = lua_util.override_defaults(pyzor_conf, opts)
  40. if not pyzor_conf.prefix then
  41. pyzor_conf.prefix = 'rext_' .. N .. '_'
  42. end
  43. if not pyzor_conf.log_prefix then
  44. pyzor_conf.log_prefix = N .. ' (' .. pyzor_conf.detection_category .. ')'
  45. end
  46. if not pyzor_conf['servers'] then
  47. rspamd_logger.errx(rspamd_config, 'no servers defined')
  48. return nil
  49. end
  50. pyzor_conf['upstreams'] = upstream_list.create(rspamd_config,
  51. pyzor_conf['servers'],
  52. pyzor_conf.default_port)
  53. if pyzor_conf['upstreams'] then
  54. lua_util.add_debug_alias('external_services', N)
  55. return pyzor_conf
  56. end
  57. rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
  58. pyzor_conf['servers'])
  59. return nil
  60. end
  61. local function pyzor_check(task, content, digest, rule)
  62. local function pyzor_check_uncached ()
  63. local upstream = rule.upstreams:get_upstream_round_robin()
  64. local addr = upstream:get_addr()
  65. local retransmits = rule.retransmits
  66. local function pyzor_callback(err, data, conn)
  67. if err then
  68. -- retry with another upstream until retransmits exceeds
  69. if retransmits > 0 then
  70. retransmits = retransmits - 1
  71. -- Select a different upstream!
  72. upstream = rule.upstreams:get_upstream_round_robin()
  73. addr = upstream:get_addr()
  74. lua_util.debugm(N, task, '%s: retry IP: %s:%s err: %s',
  75. rule.log_prefix, addr, addr:get_port(), err)
  76. tcp.request({
  77. task = task,
  78. host = addr:to_string(),
  79. port = addr:get_port(),
  80. upstream = upstream,
  81. timeout = rule['timeout'],
  82. shutdown = true,
  83. data = content,
  84. callback = pyzor_callback,
  85. })
  86. else
  87. rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits exceed',
  88. rule['symbol'], rule['type'])
  89. task:insert_result(rule['symbol_fail'], 0.0,
  90. 'failed to scan and retransmits exceed')
  91. end
  92. else
  93. -- pyzor output is unicode (\x09 -> tab, \0a -> newline)
  94. -- public.pyzor.org:24441 (200, 'OK') 21285091 206759
  95. -- server:port Code Diag Count WL-Count
  96. local str_data = tostring(data)
  97. lua_util.debugm(N, task, '%s: returned data: %s',
  98. rule.log_prefix, str_data)
  99. -- If pyzor would return JSON this wouldn't be necessary
  100. local resp = {}
  101. for v in string.gmatch(str_data, '[^\t]+') do
  102. table.insert(resp, v)
  103. end
  104. -- rspamd_logger.infox(task, 'resp: %s', resp)
  105. if resp[2] ~= [[(200, 'OK')]] then
  106. rspamd_logger.errx(task, "error parsing response: %s", str_data)
  107. return
  108. end
  109. local whitelisted = tonumber(resp[4])
  110. local reported = tonumber(resp[3])
  111. --rspamd_logger.infox(task, "%s - count=%s wl=%s", addr:to_string(), reported, whitelisted)
  112. --[[
  113. Weight is Count - WL-Count of rule.default_score in percent, e.g.
  114. SPAM:
  115. Count: 100 (100%)
  116. WL-Count: 1 (1%)
  117. rule.default_score: 1
  118. Weight: 0.99
  119. HAM:
  120. Count: 10 (100%)
  121. WL-Count: 10 (100%)
  122. rule.default_score: 1
  123. Weight: 0
  124. ]]
  125. local weight = tonumber(string.format("%.2f",
  126. rule.default_score * (reported - whitelisted) / (reported + whitelisted)))
  127. local info = string.format("count=%d wl=%d", reported, whitelisted)
  128. local threat_string = string.format("bl_%d_wl_%d",
  129. reported, whitelisted)
  130. if weight > 0 then
  131. lua_util.debugm(N, task, '%s: returned result is spam - info: %s',
  132. rule.log_prefix, info)
  133. common.yield_result(task, rule, threat_string, weight)
  134. common.save_cache(task, digest, rule, threat_string, weight)
  135. else
  136. if rule.log_clean then
  137. rspamd_logger.infox(task, '%s: clean, returned result is ham - info: %s',
  138. rule.log_prefix, info)
  139. else
  140. lua_util.debugm(N, task, '%s: returned result is ham - info: %s',
  141. rule.log_prefix, info)
  142. end
  143. common.save_cache(task, digest, rule, 'OK', weight)
  144. end
  145. end
  146. end
  147. if digest == 'da39a3ee5e6b4b0d3255bfef95601890afd80709' then
  148. rspamd_logger.infox(task, '%s: not checking default digest', rule.log_prefix)
  149. return
  150. end
  151. tcp.request({
  152. task = task,
  153. host = addr:to_string(),
  154. port = addr:get_port(),
  155. upstream = upstream,
  156. timeout = rule.timeout,
  157. shutdown = true,
  158. data = content,
  159. callback = pyzor_callback,
  160. })
  161. end
  162. if common.condition_check_and_continue(task, content, rule, digest, pyzor_check_uncached) then
  163. return
  164. else
  165. pyzor_check_uncached()
  166. end
  167. end
  168. return {
  169. type = categories,
  170. description = 'pyzor bulk scanner',
  171. configure = pyzor_config,
  172. check = pyzor_check,
  173. name = N
  174. }