Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. -- Licensed to the Apache Software Foundation (ASF) under one or more
  2. -- contributor license agreements. See the NOTICE file distributed with
  3. -- this work for additional information regarding copyright ownership.
  4. -- The ASF licenses this file to you under the Apache License, Version 2.0
  5. -- (the "License"); you may not use this file except in compliance with
  6. -- the License. You may obtain a copy of the License at:
  7. --
  8. -- http://www.apache.org/licenses/LICENSE-2.0
  9. --
  10. -- Unless required by applicable law or agreed to in writing, software
  11. -- distributed under the License is distributed on an "AS IS" BASIS,
  12. -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. -- See the License for the specific language governing permissions and
  14. -- limitations under the License.
  15. local reconf = config['regexp']
  16. local rspamd_regexp = require "rspamd_regexp"
  17. -- Messages that have only HTML part
  18. reconf['MIME_HTML_ONLY'] = {
  19. re = 'has_only_html_part()',
  20. score = 0.2,
  21. description = 'Messages that have only HTML part',
  22. group = 'headers'
  23. }
  24. local function has_anchor_parent(tag)
  25. local parent = tag
  26. repeat
  27. parent = parent:get_parent()
  28. if parent then
  29. if parent:get_type() == 'a' then
  30. return true
  31. end
  32. end
  33. until not parent
  34. return false
  35. end
  36. local function check_html_image(task, min, max)
  37. local tp = task:get_text_parts()
  38. for _,p in ipairs(tp) do
  39. if p:is_html() then
  40. local hc = p:get_html()
  41. local len = p:get_length()
  42. if hc and len >= min and len < max then
  43. local images = hc:get_images()
  44. if images then
  45. for _,i in ipairs(images) do
  46. local tag = i['tag']
  47. if tag then
  48. if has_anchor_parent(tag) then
  49. -- do not trigger on small and unknown size images
  50. if i['height'] + i['width'] >= 210 and i['embedded'] then
  51. return true
  52. end
  53. end
  54. end
  55. end
  56. end
  57. end
  58. end
  59. end
  60. end
  61. rspamd_config.HTML_SHORT_LINK_IMG_1 = {
  62. callback = function(task)
  63. return check_html_image(task, 0, 1024)
  64. end,
  65. score = 2.0,
  66. group = 'html',
  67. description = 'Short html part (0..1K) with a link to an image'
  68. }
  69. rspamd_config.HTML_SHORT_LINK_IMG_2 = {
  70. callback = function(task)
  71. return check_html_image(task, 1024, 1536)
  72. end,
  73. score = 1.0,
  74. group = 'html',
  75. description = 'Short html part (1K..1.5K) with a link to an image'
  76. }
  77. rspamd_config.HTML_SHORT_LINK_IMG_3 = {
  78. callback = function(task)
  79. return check_html_image(task, 1536, 2048)
  80. end,
  81. score = 0.5,
  82. group = 'html',
  83. description = 'Short html part (1.5K..2K) with a link to an image'
  84. }
  85. rspamd_config.R_EMPTY_IMAGE = {
  86. callback = function(task)
  87. local tp = task:get_text_parts() -- get text parts in a message
  88. for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
  89. if p:is_html() then -- if the current part is html part
  90. local hc = p:get_html() -- we get HTML context
  91. local len = p:get_length() -- and part's length
  92. if hc and len < 50 then -- if we have a part that has less than 50 bytes of text
  93. local images = hc:get_images() -- then we check for HTML images
  94. if images then -- if there are images
  95. for _,i in ipairs(images) do -- then iterate over images in the part
  96. if i['height'] + i['width'] >= 400 then -- if we have a large image
  97. local tag = i['tag']
  98. if tag then
  99. if not has_anchor_parent(tag) then
  100. return true
  101. end
  102. end
  103. end
  104. end
  105. end
  106. end
  107. end
  108. end
  109. end,
  110. score = 2.0,
  111. group = 'html',
  112. description = 'Message contains empty parts and image'
  113. }
  114. rspamd_config.R_SUSPICIOUS_IMAGES = {
  115. callback = function(task)
  116. local tp = task:get_text_parts() -- get text parts in a message
  117. for _, p in ipairs(tp) do
  118. local h = p:get_html()
  119. if h then
  120. local l = p:get_words_count()
  121. local img = h:get_images()
  122. local pic_words = 0
  123. if img then
  124. for _, i in ipairs(img) do
  125. local dim = i['width'] + i['height']
  126. local tag = i['tag']
  127. if tag then
  128. if has_anchor_parent(tag) then
  129. if dim > 100 and dim < 3000 then
  130. -- We assume that a single picture 100x200 contains approx 3 words of text
  131. pic_words = pic_words + dim / 100
  132. end
  133. end
  134. end
  135. end
  136. end
  137. if l + pic_words > 0 then
  138. local rel = pic_words / (l + pic_words)
  139. if rel > 0.5 then
  140. return true, (rel - 0.5) * 2
  141. end
  142. end
  143. end
  144. end
  145. return false
  146. end,
  147. score = 5.0,
  148. group = 'html',
  149. description = 'Message contains many suspicious messages'
  150. }
  151. local vis_check_id = rspamd_config:register_symbol{
  152. name = 'HTML_VISIBLE_CHECKS',
  153. type = 'callback',
  154. group = 'html',
  155. callback = function(task)
  156. --local logger = require "rspamd_logger"
  157. local tp = task:get_text_parts() -- get text parts in a message
  158. local ret = false
  159. local transp_rate = 0
  160. local invisible_blocks = 0
  161. local zero_size_blocks = 0
  162. local arg
  163. local normal_len = 0
  164. local transp_len = 0
  165. for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
  166. normal_len = normal_len + p:get_length()
  167. if p:is_html() and p:get_html() then -- if the current part is html part
  168. local hc = p:get_html() -- we get HTML context
  169. hc:foreach_tag({'font', 'span', 'div', 'p', 'td'}, function(tag, clen, is_leaf)
  170. local bl = tag:get_style()
  171. if bl then
  172. if not bl.visible and clen > 0 and is_leaf then
  173. invisible_blocks = invisible_blocks + 1
  174. end
  175. if (bl.font_size or 12) == 0 and clen > 0 and is_leaf then
  176. zero_size_blocks = zero_size_blocks + 1
  177. end
  178. if bl.transparent and is_leaf then
  179. ret = true
  180. invisible_blocks = invisible_blocks + 1 -- This block is invisible
  181. transp_len = transp_len + clen
  182. normal_len = normal_len - clen
  183. local tr = transp_len / (normal_len + transp_len)
  184. if tr > transp_rate then
  185. transp_rate = tr
  186. if not bl.color then bl.color = {0, 0, 0} end
  187. if not bl.bgcolor then bl.bgcolor = {0, 0, 0} end
  188. arg = string.format('%s color #%x%x%x bgcolor #%x%x%x',
  189. tag:get_type(),
  190. bl.color[1], bl.color[2], bl.color[3],
  191. bl.bgcolor[1], bl.bgcolor[2], bl.bgcolor[3])
  192. end
  193. end
  194. end
  195. return false -- Continue search
  196. end)
  197. end
  198. end
  199. if ret then
  200. transp_rate = transp_len / (normal_len + transp_len)
  201. if transp_rate > 0.1 then
  202. if transp_rate > 0.5 or transp_rate ~= transp_rate then
  203. transp_rate = 0.5
  204. end
  205. task:insert_result('R_WHITE_ON_WHITE', (transp_rate * 2.0), arg)
  206. end
  207. end
  208. if invisible_blocks > 0 then
  209. if invisible_blocks > 10 then
  210. invisible_blocks = 10
  211. end
  212. local rates = { -- From 1 to 10
  213. 0.05,
  214. 0.1,
  215. 0.2,
  216. 0.3,
  217. 0.4,
  218. 0.5,
  219. 0.6,
  220. 0.7,
  221. 0.8,
  222. 1.0,
  223. }
  224. task:insert_result('MANY_INVISIBLE_PARTS', rates[invisible_blocks],
  225. tostring(invisible_blocks))
  226. end
  227. if zero_size_blocks > 0 then
  228. if zero_size_blocks > 5 then
  229. if zero_size_blocks > 10 then
  230. -- Full score
  231. task:insert_result('ZERO_FONT', 1.0,
  232. tostring(zero_size_blocks))
  233. else
  234. zero_size_blocks = 5
  235. end
  236. end
  237. if zero_size_blocks <= 5 then
  238. local rates = { -- From 1 to 5
  239. 0.1,
  240. 0.2,
  241. 0.2,
  242. 0.3,
  243. 0.5,
  244. }
  245. task:insert_result('ZERO_FONT', rates[zero_size_blocks],
  246. tostring(zero_size_blocks))
  247. end
  248. end
  249. end,
  250. }
  251. rspamd_config:register_symbol{
  252. type = 'virtual',
  253. parent = vis_check_id,
  254. name = 'R_WHITE_ON_WHITE',
  255. description = 'Message contains low contrast text',
  256. score = 4.0,
  257. group = 'html',
  258. one_shot = true,
  259. }
  260. rspamd_config:register_symbol{
  261. type = 'virtual',
  262. parent = vis_check_id,
  263. name = 'ZERO_FONT',
  264. description = 'Zero sized font used',
  265. score = 1.0, -- Reached if more than 5 elements have zero size
  266. one_shot = true,
  267. group = 'html'
  268. }
  269. rspamd_config:register_symbol{
  270. type = 'virtual',
  271. parent = vis_check_id,
  272. name = 'MANY_INVISIBLE_PARTS',
  273. description = 'Many parts are visually hidden',
  274. score = 1.0, -- Reached if more than 10 elements are hidden
  275. one_shot = true,
  276. group = 'html'
  277. }
  278. rspamd_config.EXT_CSS = {
  279. callback = function(task)
  280. local regexp_lib = require "rspamd_regexp"
  281. local re = regexp_lib.create_cached('/^.*\\.css(?:[?#].*)?$/i')
  282. local tp = task:get_text_parts() -- get text parts in a message
  283. local ret = false
  284. for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
  285. if p:is_html() and p:get_html() then -- if the current part is html part
  286. local hc = p:get_html() -- we get HTML context
  287. hc:foreach_tag({'link'}, function(tag)
  288. local bl = tag:get_extra()
  289. if bl then
  290. local s = tostring(bl)
  291. if s and re:match(s) then
  292. ret = true
  293. end
  294. end
  295. return ret -- Continue search
  296. end)
  297. end
  298. end
  299. return ret
  300. end,
  301. score = 1.0,
  302. group = 'html',
  303. description = 'Message contains external CSS reference'
  304. }
  305. local https_re = rspamd_regexp.create_cached('/^https:/i')
  306. rspamd_config.HTTP_TO_HTTPS = {
  307. callback = function(task)
  308. local found_opts
  309. local tp = task:get_text_parts() or {}
  310. for _,p in ipairs(tp) do
  311. if p:is_html() then
  312. local hc = p:get_html()
  313. if (not hc) then return false end
  314. local found = false
  315. hc:foreach_tag('a', function (tag, _)
  316. -- Skip this loop if we already have a match
  317. if (found) then return true end
  318. local c = tag:get_content()
  319. if (c) then
  320. if (not https_re:match(c)) then return false end
  321. local u = tag:get_extra()
  322. if (not u) then return false end
  323. local url_proto = u:get_protocol()
  324. if url_proto ~= 'http' then return false end
  325. -- Capture matches for http in href to https in visible part only
  326. found = true
  327. found_opts = u:get_host()
  328. return true
  329. end
  330. return false
  331. end)
  332. if (found) then
  333. return true,1.0,found_opts
  334. end
  335. return false
  336. end
  337. end
  338. return false
  339. end,
  340. description = 'Anchor text contains different scheme to target URL',
  341. score = 2.0,
  342. group = 'html'
  343. }
  344. rspamd_config.HTTP_TO_IP = {
  345. callback = function(task)
  346. local tp = task:get_text_parts()
  347. if (not tp) then return false end
  348. for _,p in ipairs(tp) do
  349. if p:is_html() then
  350. local hc = p:get_html()
  351. if (not hc) then return false end
  352. local found = false
  353. hc:foreach_tag('a', function (tag, length)
  354. if (found) then return true end
  355. local u = tag:get_extra()
  356. if (u) then
  357. u = tostring(u):lower()
  358. if (u:match('^https?://%d+%.%d+%.%d+%.%d+')) then
  359. found = true
  360. end
  361. end
  362. return false
  363. end)
  364. if found then return true end
  365. return false
  366. end
  367. end
  368. end,
  369. description = 'Anchor points to an IP address',
  370. score = 1.0,
  371. group = 'html'
  372. }