You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

pyzor.lua 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. --[[
  2. Copyright (c) 2021, defkev <defkev@gmail.com>
  3. Copyright (c) 2018, Carsten Rosenberg <c.rosenberg@heinlein-support.de>
  4. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  5. Licensed under the Apache License, Version 2.0 (the "License");
  6. you may not use this file except in compliance with the License.
  7. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ]]--
  15. --[[[
  16. -- @module pyzor
  17. -- This module contains pyzor access functions
  18. --]]
  19. local lua_util = require "lua_util"
  20. local tcp = require "rspamd_tcp"
  21. local upstream_list = require "rspamd_upstream_list"
  22. local rspamd_logger = require "rspamd_logger"
  23. local common = require "lua_scanners/common"
  24. local N = 'pyzor'
  25. local categories = { 'pyzor', 'bulk', 'hash', 'scanner' }
  26. local function pyzor_config(opts)
  27. local pyzor_conf = {
  28. text_part_min_words = 2,
  29. default_port = 5953,
  30. timeout = 15.0,
  31. log_clean = false,
  32. retransmits = 2,
  33. detection_category = "hash",
  34. cache_expire = 7200, -- expire redis in one hour
  35. message = '${SCANNER}: Pyzor bulk message found: "${VIRUS}"',
  36. default_score = 1.5,
  37. action = false,
  38. min_threshold = 0, -- zero score if whitelist+report count smaller than this
  39. halfscore_threshold = 0, -- half score if whitelist+report count smaller than this
  40. }
  41. pyzor_conf = lua_util.override_defaults(pyzor_conf, opts)
  42. if not pyzor_conf.prefix then
  43. pyzor_conf.prefix = 'rext_' .. N .. '_'
  44. end
  45. if not pyzor_conf.log_prefix then
  46. pyzor_conf.log_prefix = N .. ' (' .. pyzor_conf.detection_category .. ')'
  47. end
  48. if not pyzor_conf['servers'] then
  49. rspamd_logger.errx(rspamd_config, 'no servers defined')
  50. return nil
  51. end
  52. pyzor_conf['upstreams'] = upstream_list.create(rspamd_config,
  53. pyzor_conf['servers'],
  54. pyzor_conf.default_port)
  55. if pyzor_conf['upstreams'] then
  56. lua_util.add_debug_alias('external_services', N)
  57. return pyzor_conf
  58. end
  59. rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
  60. pyzor_conf['servers'])
  61. return nil
  62. end
  63. local function pyzor_check(task, content, digest, rule)
  64. local function pyzor_check_uncached ()
  65. local upstream = rule.upstreams:get_upstream_round_robin()
  66. local addr = upstream:get_addr()
  67. local retransmits = rule.retransmits
  68. local function pyzor_callback(err, data, conn)
  69. if err then
  70. -- retry with another upstream until retransmits exceeds
  71. if retransmits > 0 then
  72. retransmits = retransmits - 1
  73. -- Select a different upstream!
  74. upstream = rule.upstreams:get_upstream_round_robin()
  75. addr = upstream:get_addr()
  76. lua_util.debugm(N, task, '%s: retry IP: %s:%s err: %s',
  77. rule.log_prefix, addr, addr:get_port(), err)
  78. tcp.request({
  79. task = task,
  80. host = addr:to_string(),
  81. port = addr:get_port(),
  82. upstream = upstream,
  83. timeout = rule['timeout'],
  84. shutdown = true,
  85. data = content,
  86. callback = pyzor_callback,
  87. })
  88. else
  89. rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits exceed',
  90. rule['symbol'], rule['type'])
  91. task:insert_result(rule['symbol_fail'], 0.0,
  92. 'failed to scan and retransmits exceed')
  93. end
  94. else
  95. -- pyzor output is unicode (\x09 -> tab, \0a -> newline)
  96. -- public.pyzor.org:24441 (200, 'OK') 21285091 206759
  97. -- server:port Code Diag Count WL-Count
  98. local str_data = tostring(data)
  99. lua_util.debugm(N, task, '%s: returned data: %s',
  100. rule.log_prefix, str_data)
  101. -- If pyzor would return JSON this wouldn't be necessary
  102. local resp = {}
  103. for v in string.gmatch(str_data, '[^\t]+') do
  104. table.insert(resp, v)
  105. end
  106. -- rspamd_logger.infox(task, 'resp: %s', resp)
  107. if resp[2] ~= [[(200, 'OK')]] then
  108. rspamd_logger.errx(task, "error parsing response: %s", str_data)
  109. return
  110. end
  111. local whitelisted = tonumber(resp[4])
  112. local reported = tonumber(resp[3])
  113. --rspamd_logger.infox(task, "%s - count=%s wl=%s", addr:to_string(), reported, whitelisted)
  114. --[[
  115. Weight is Count - WL-Count of rule.default_score in percent, e.g.
  116. SPAM:
  117. Count: 100 (100%)
  118. WL-Count: 1 (1%)
  119. rule.default_score: 1
  120. Weight: 0.99
  121. HAM:
  122. Count: 10 (100%)
  123. WL-Count: 10 (100%)
  124. rule.default_score: 1
  125. Weight: 0
  126. ]]
  127. local weight = 0
  128. local total = reported + whitelisted
  129. if total > rule.min_threshold and total > 0 then
  130. weight = tonumber(string.format("%.2f",
  131. rule.default_score * (reported - whitelisted) / total))
  132. if total < rule.halfscore_threshold then
  133. weight = tonumber(string.format("%.2f", weight / 2))
  134. end
  135. end
  136. local info = string.format("count=%d wl=%d", reported, whitelisted)
  137. local threat_string = string.format("bl_%d_wl_%d",
  138. reported, whitelisted)
  139. if weight > 0 then
  140. lua_util.debugm(N, task, '%s: returned result is spam - info: %s',
  141. rule.log_prefix, info)
  142. common.yield_result(task, rule, threat_string, weight)
  143. common.save_cache(task, digest, rule, threat_string, weight)
  144. else
  145. if rule.log_clean then
  146. rspamd_logger.infox(task, '%s: clean, returned result is ham - info: %s',
  147. rule.log_prefix, info)
  148. else
  149. lua_util.debugm(N, task, '%s: returned result is ham - info: %s',
  150. rule.log_prefix, info)
  151. end
  152. common.save_cache(task, digest, rule, 'OK', weight)
  153. end
  154. end
  155. end
  156. if digest == 'da39a3ee5e6b4b0d3255bfef95601890afd80709' then
  157. rspamd_logger.infox(task, '%s: not checking default digest', rule.log_prefix)
  158. return
  159. end
  160. tcp.request({
  161. task = task,
  162. host = addr:to_string(),
  163. port = addr:get_port(),
  164. upstream = upstream,
  165. timeout = rule.timeout,
  166. shutdown = true,
  167. data = content,
  168. callback = pyzor_callback,
  169. })
  170. end
  171. if common.condition_check_and_continue(task, content, rule, digest, pyzor_check_uncached) then
  172. return
  173. else
  174. pyzor_check_uncached()
  175. end
  176. end
  177. return {
  178. type = categories,
  179. description = 'pyzor bulk scanner',
  180. configure = pyzor_config,
  181. check = pyzor_check,
  182. name = N
  183. }