You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

html.lua 4.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. -- Licensed to the Apache Software Foundation (ASF) under one or more
  2. -- contributor license agreements. See the NOTICE file distributed with
  3. -- this work for additional information regarding copyright ownership.
  4. -- The ASF licenses this file to you under the Apache License, Version 2.0
  5. -- (the "License"); you may not use this file except in compliance with
  6. -- the License. You may obtain a copy of the License at:
  7. --
  8. -- http://www.apache.org/licenses/LICENSE-2.0
  9. --
  10. -- Unless required by applicable law or agreed to in writing, software
  11. -- distributed under the License is distributed on an "AS IS" BASIS,
  12. -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. -- See the License for the specific language governing permissions and
  14. -- limitations under the License.
  15. local reconf = config['regexp']
  16. local rspamd_regexp = require "rspamd_regexp"
  17. local rspamd_logger = require "rspamd_logger"
  18. -- Messages that have only HTML part
  19. reconf['MIME_HTML_ONLY'] = 'has_only_html_part()'
  20. local function check_html_image(task, min, max)
  21. local tp = task:get_text_parts()
  22. for _,p in ipairs(tp) do
  23. if p:is_html() then
  24. local hc = p:get_html()
  25. local len = p:get_length()
  26. if len >= min and len < max then
  27. local images = hc:get_images()
  28. if images then
  29. for _,i in ipairs(images) do
  30. if i['embedded'] then
  31. return true
  32. end
  33. end
  34. end
  35. end
  36. end
  37. end
  38. end
  39. rspamd_config.HTML_SHORT_LINK_IMG_1 = {
  40. callback = function(task)
  41. return check_html_image(task, 0, 1024)
  42. end,
  43. score = 3.0,
  44. group = 'html',
  45. description = 'Short html part (0..1K) with a link to an image'
  46. }
  47. rspamd_config.HTML_SHORT_LINK_IMG_2 = {
  48. callback = function(task)
  49. return check_html_image(task, 1024, 1536)
  50. end,
  51. score = 1.0,
  52. group = 'html',
  53. description = 'Short html part (1K..1.5K) with a link to an image'
  54. }
  55. rspamd_config.HTML_SHORT_LINK_IMG_3 = {
  56. callback = function(task)
  57. return check_html_image(task, 1536, 2048)
  58. end,
  59. score = 0.5,
  60. group = 'html',
  61. description = 'Short html part (1.5K..2K) with a link to an image'
  62. }
  63. rspamd_config.R_EMPTY_IMAGE = {
  64. callback = function(task)
  65. local tp = task:get_text_parts() -- get text parts in a message
  66. for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
  67. if p:is_html() then -- if the current part is html part
  68. local hc = p:get_html() -- we get HTML context
  69. local len = p:get_length() -- and part's length
  70. if len < 50 then -- if we have a part that has less than 50 bytes of text
  71. local images = hc:get_images() -- then we check for HTML images
  72. if images then -- if there are images
  73. for _,i in ipairs(images) do -- then iterate over images in the part
  74. if i['embedded'] and i['height'] + i['width'] >= 400 then -- if we have a large image
  75. return true -- add symbol
  76. end
  77. end
  78. end
  79. end
  80. end
  81. end
  82. end,
  83. score = 2.0,
  84. group = 'html',
  85. description = 'Message contains empty parts and image'
  86. }
  87. rspamd_config.R_SUSPICIOUS_IMAGES = {
  88. callback = function(task)
  89. local tp = task:get_text_parts() -- get text parts in a message
  90. for _, p in ipairs(tp) do
  91. local h = p:get_html()
  92. if h then
  93. local l = p:get_words_count()
  94. local img = h:get_images()
  95. local pic_words = 0
  96. if img then
  97. for _, i in ipairs(img) do
  98. if i['embedded'] then
  99. local dim = i['width'] + i['height']
  100. -- do not trigger on small and large images
  101. if dim > 100 and dim < 3000 then
  102. -- We assume that a single picture 100x200 contains approx 3 words of text
  103. pic_words = pic_words + dim / 100
  104. end
  105. end
  106. end
  107. end
  108. if l + pic_words > 0 then
  109. local rel = pic_words / (l + pic_words)
  110. if rel > 0.5 then
  111. return true, (rel - 0.5) * 2
  112. end
  113. end
  114. end
  115. end
  116. return false
  117. end,
  118. score = 5.0,
  119. group = 'html',
  120. description = 'Message contains many suspicious messages'
  121. }