You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

html.lua 4.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. -- Licensed to the Apache Software Foundation (ASF) under one or more
  2. -- contributor license agreements. See the NOTICE file distributed with
  3. -- this work for additional information regarding copyright ownership.
  4. -- The ASF licenses this file to you under the Apache License, Version 2.0
  5. -- (the "License"); you may not use this file except in compliance with
  6. -- the License. You may obtain a copy of the License at:
  7. --
  8. -- http://www.apache.org/licenses/LICENSE-2.0
  9. --
  10. -- Unless required by applicable law or agreed to in writing, software
  11. -- distributed under the License is distributed on an "AS IS" BASIS,
  12. -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. -- See the License for the specific language governing permissions and
  14. -- limitations under the License.
  15. local reconf = config['regexp']
  16. local rspamd_regexp = require "rspamd_regexp"
  17. local rspamd_logger = require "rspamd_logger"
  18. -- Messages that have only HTML part
  19. reconf['MIME_HTML_ONLY'] = 'has_only_html_part()'
  20. local function check_html_image(task, min, max)
  21. local tp = task:get_text_parts()
  22. for _,p in ipairs(tp) do
  23. if p:is_html() then
  24. local hc = p:get_html()
  25. local len = p:get_length()
  26. if hc and len >= min and len < max then
  27. local images = hc:get_images()
  28. if images then
  29. for _,i in ipairs(images) do
  30. local tag = i['tag']
  31. if tag then
  32. local parent = tag:get_parent()
  33. if parent then
  34. if parent:get_type() == 'a' then
  35. -- do not trigger on small and unknown size images
  36. if i['height'] + i['width'] >= 210 then
  37. return true
  38. end
  39. end
  40. end
  41. end
  42. end
  43. end
  44. end
  45. end
  46. end
  47. end
  48. rspamd_config.HTML_SHORT_LINK_IMG_1 = {
  49. callback = function(task)
  50. return check_html_image(task, 0, 1024)
  51. end,
  52. score = 3.0,
  53. group = 'html',
  54. description = 'Short html part (0..1K) with a link to an image'
  55. }
  56. rspamd_config.HTML_SHORT_LINK_IMG_2 = {
  57. callback = function(task)
  58. return check_html_image(task, 1024, 1536)
  59. end,
  60. score = 1.0,
  61. group = 'html',
  62. description = 'Short html part (1K..1.5K) with a link to an image'
  63. }
  64. rspamd_config.HTML_SHORT_LINK_IMG_3 = {
  65. callback = function(task)
  66. return check_html_image(task, 1536, 2048)
  67. end,
  68. score = 0.5,
  69. group = 'html',
  70. description = 'Short html part (1.5K..2K) with a link to an image'
  71. }
  72. rspamd_config.R_EMPTY_IMAGE = {
  73. callback = function(task)
  74. local tp = task:get_text_parts() -- get text parts in a message
  75. for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
  76. if p:is_html() then -- if the current part is html part
  77. local hc = p:get_html() -- we get HTML context
  78. local len = p:get_length() -- and part's length
  79. if hc and len < 50 then -- if we have a part that has less than 50 bytes of text
  80. local images = hc:get_images() -- then we check for HTML images
  81. if images then -- if there are images
  82. for _,i in ipairs(images) do -- then iterate over images in the part
  83. if i['height'] + i['width'] >= 400 then -- if we have a large image
  84. local tag = i['tag']
  85. if tag then
  86. local parent = tag:get_parent()
  87. if parent then
  88. if parent:get_type() ~= 'a' then
  89. return true
  90. end
  91. end
  92. end
  93. end
  94. end
  95. end
  96. end
  97. end
  98. end
  99. end,
  100. score = 2.0,
  101. group = 'html',
  102. description = 'Message contains empty parts and image'
  103. }
  104. rspamd_config.R_SUSPICIOUS_IMAGES = {
  105. callback = function(task)
  106. local tp = task:get_text_parts() -- get text parts in a message
  107. for _, p in ipairs(tp) do
  108. local h = p:get_html()
  109. if h then
  110. local l = p:get_words_count()
  111. local img = h:get_images()
  112. local pic_words = 0
  113. if img then
  114. for _, i in ipairs(img) do
  115. local dim = i['width'] + i['height']
  116. local tag = i['tag']
  117. if tag then
  118. local parent = tag:get_parent()
  119. if parent then
  120. if parent:get_type() == 'a' then
  121. -- do not trigger on small and large images
  122. if dim > 100 and dim < 3000 then
  123. -- We assume that a single picture 100x200 contains approx 3 words of text
  124. pic_words = pic_words + dim / 100
  125. end
  126. end
  127. end
  128. end
  129. end
  130. end
  131. if l + pic_words > 0 then
  132. local rel = pic_words / (l + pic_words)
  133. if rel > 0.5 then
  134. return true, (rel - 0.5) * 2
  135. end
  136. end
  137. end
  138. end
  139. return false
  140. end,
  141. score = 5.0,
  142. group = 'html',
  143. description = 'Message contains many suspicious messages'
  144. }