-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements.  See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to you under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License.  You may obtain a copy of the License at:
--
--     http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.

local reconf = config['regexp']

-- Messages that have only HTML part
reconf['MIME_HTML_ONLY'] = {
  re = 'has_only_html_part()',
  score = 0.2,
  description = 'Messages that have only HTML part',
  group = 'headers'
}

local function check_html_image(task, min, max)
  local tp = task:get_text_parts()

  for _,p in ipairs(tp) do
    if p:is_html() then
      local hc = p:get_html()
      local len = p:get_length()


      if hc and len >= min and len < max then
        local images = hc:get_images()
        if images then
          for _,i in ipairs(images) do
            local tag = i['tag']
            if tag then
              local parent = tag:get_parent()
              if parent then
                if parent:get_type() == 'a' then
                  -- do not trigger on small and unknown size images
                  if i['height'] + i['width'] >= 210 or not i['embedded'] then
                    return true
                  end
                end
              end
            end
          end
        end
      end
    end
  end
end

rspamd_config.HTML_SHORT_LINK_IMG_1 = {
  callback = function(task)
    return check_html_image(task, 0, 1024)
  end,
  score = 2.0,
  group = 'html',
  description = 'Short html part (0..1K) with a link to an image'
}

rspamd_config.HTML_SHORT_LINK_IMG_2 = {
  callback = function(task)
    return check_html_image(task, 1024, 1536)
  end,
  score = 1.0,
  group = 'html',
  description = 'Short html part (1K..1.5K) with a link to an image'
}

rspamd_config.HTML_SHORT_LINK_IMG_3 = {
  callback = function(task)
    return check_html_image(task, 1536, 2048)
  end,
  score = 0.5,
  group = 'html',
  description = 'Short html part (1.5K..2K) with a link to an image'
}
rspamd_config.R_EMPTY_IMAGE = {
  callback = function(task)
    local tp = task:get_text_parts() -- get text parts in a message

    for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
      if p:is_html() then -- if the current part is html part
        local hc = p:get_html() -- we get HTML context
        local len = p:get_length() -- and part's length

        if hc and len < 50 then -- if we have a part that has less than 50 bytes of text
          local images = hc:get_images() -- then we check for HTML images

          if images then -- if there are images
            for _,i in ipairs(images) do -- then iterate over images in the part
              if i['height'] + i['width'] >= 400 then -- if we have a large image
                local tag = i['tag']
                if tag then
                  local parent = tag:get_parent()
                  if parent then
                    if parent:get_type() ~= 'a' then
                      return true
                    end
                  end
                end
              end
            end
          end
        end
      end
    end
  end,

  score = 2.0,
  group = 'html',
  description = 'Message contains empty parts and image'
}

rspamd_config.R_SUSPICIOUS_IMAGES = {
  callback = function(task)
    local tp = task:get_text_parts() -- get text parts in a message

    for _, p in ipairs(tp) do
      local h = p:get_html()

      if h then
        local l = p:get_words_count()
        local img = h:get_images()
        local pic_words = 0

        if img then
          for _, i in ipairs(img) do
            local dim = i['width'] + i['height']
            local tag = i['tag']

            if tag then
              local parent = tag:get_parent()
              if parent then
                if parent:get_type() == 'a' then
                  -- do not trigger on small and large images
                  if dim > 100 and dim < 3000 then
                    -- We assume that a single picture 100x200 contains approx 3 words of text
                    pic_words = pic_words + dim / 100
                  end
                end
              end
            end
          end
        end

        if l + pic_words > 0 then
          local rel = pic_words / (l + pic_words)

          if rel > 0.5 then
            return true, (rel - 0.5) * 2
          end
        end
      end
    end

    return false
  end,

  score = 5.0,
  group = 'html',
  description = 'Message contains many suspicious messages'
}

local vis_check_id = rspamd_config:register_symbol{
  name = 'HTML_VISIBLE_CHECKS',
  type = 'callback',
  group = 'html',
  callback = function(task)
    --local logger = require "rspamd_logger"
    local tp = task:get_text_parts() -- get text parts in a message
    local ret = false
    local diff = 0.0
    local transp_rate = 0
    local invisible_blocks = 0
    local zero_size_blocks = 0
    local arg

    local normal_len = 0
    local transp_len = 0

    for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
      normal_len = normal_len + p:get_length()
      if p:is_html() and p:get_html() then -- if the current part is html part
        local hc = p:get_html() -- we get HTML context

        hc:foreach_tag({'font', 'span', 'div', 'p', 'td'}, function(tag)
          local bl = tag:get_extra()
          if bl then
            if not bl['visible'] then
              invisible_blocks = invisible_blocks + 1
            end

            if bl['font_size'] and bl['font_size'] == 0 then
              zero_size_blocks = zero_size_blocks + 1
            end

            if bl['bgcolor'] and bl['color'] and bl['visible'] then

              local color = bl['color']
              local bgcolor = bl['bgcolor']
              -- Should use visual approach here some day
              local diff_r = math.abs(color[1] - bgcolor[1])
              local diff_g = math.abs(color[2] - bgcolor[2])
              local diff_b = math.abs(color[3] - bgcolor[3])
              local r_avg = (color[1] + bgcolor[1]) / 2.0
              -- Square
              diff_r = diff_r * diff_r
              diff_g = diff_g * diff_g
              diff_b = diff_b * diff_b

              diff = math.sqrt(2*diff_r + 4*diff_g + 3 * diff_b +
                  (r_avg * (diff_r - diff_b) / 256.0))
              diff = diff / 256.0

              if diff < 0.1 then
                ret = true
                local content_len = #(tag:get_content() or {})
                invisible_blocks = invisible_blocks + 1 -- This block is invisible
                transp_len = transp_len + content_len * (0.1 - diff) * 10.0
                normal_len = normal_len - content_len
                local tr = transp_len / (normal_len + transp_len)
                if tr > transp_rate then
                  transp_rate = tr
                  arg = string.format('%s color #%x%x%x bgcolor #%x%x%x',
                    tostring(tag:get_type()),
                    color[1], color[2], color[3],
                    bgcolor[1], bgcolor[2], bgcolor[3])
                end
              end
            end
          end

          return false -- Continue search
        end)

      end
    end

    if ret then
      transp_rate = transp_len / (normal_len + transp_len)

      if transp_rate > 0.1 then
        if transp_rate > 0.5 or transp_rate ~= transp_rate then
          transp_rate = 0.5
        end

        task:insert_result('R_WHITE_ON_WHITE', (transp_rate * 2.0), arg)
      end
    end

    if invisible_blocks > 0 then
      if invisible_blocks > 10 then
        invisible_blocks = 10
      end
      local rates = { -- From 1 to 10
        0.05,
        0.1,
        0.2,
        0.3,
        0.4,
        0.5,
        0.6,
        0.7,
        0.8,
        1.0,
      }
      task:insert_result('MANY_INVISIBLE_PARTS', rates[invisible_blocks],
          tostring(invisible_blocks))
    end

    if zero_size_blocks > 0 then
      if zero_size_blocks > 5 then
        if zero_size_blocks > 10 then
          -- Full score
          task:insert_result('ZERO_FONT', 1.0,
              tostring(zero_size_blocks))
        else
          zero_size_blocks = 5
        end
      end

      if zero_size_blocks <= 5 then
        local rates = { -- From 1 to 5
          0.1,
          0.2,
          0.2,
          0.3,
          0.5,
        }
        task:insert_result('ZERO_FONT', rates[zero_size_blocks],
            tostring(zero_size_blocks))
      end
    end
  end,
}

rspamd_config:register_symbol{
  type = 'virtual',
  parent = vis_check_id,
  name = 'R_WHITE_ON_WHITE',
  description = 'Message contains low contrast text',
  score = 4.0,
  group = 'html',
  one_shot = true,
}

rspamd_config:register_symbol{
  type = 'virtual',
  parent = vis_check_id,
  name = 'ZERO_FONT',
  description = 'Zero sized font used',
  score = 1.0, -- Reached if more than 5 elements have zero size
  one_shot = true,
  group = 'html'
}

rspamd_config:register_symbol{
  type = 'virtual',
  parent = vis_check_id,
  name = 'MANY_INVISIBLE_PARTS',
  description = 'Many parts are visually hidden',
  score = 1.0, -- Reached if more than 10 elements are hidden
  one_shot = true,
  group = 'html'
}

rspamd_config.EXT_CSS = {
  callback = function(task)
    local regexp_lib = require "rspamd_regexp"
    local re = regexp_lib.create_cached('/^.*\\.css(?:[?#].*)?$/i')
    local tp = task:get_text_parts() -- get text parts in a message
    local ret = false
    for _,p in ipairs(tp) do -- iterate over text parts array using `ipairs`
      if p:is_html() and p:get_html() then -- if the current part is html part
        local hc = p:get_html() -- we get HTML context
        hc:foreach_tag({'link'}, function(tag)
          local bl = tag:get_extra()
          if bl then
            local s = tostring(bl)
            if s and re:match(s) then
              ret = true
            end
          end

          return ret -- Continue search
        end)

      end
    end

    return ret
  end,

  score = 1.0,
  group = 'html',
  description = 'Message contains external CSS reference'
}

rspamd_config.HTTP_TO_HTTPS = {
  callback = function(task)
    local tp = task:get_text_parts()
    if (not tp) then return false end
    for _,p in ipairs(tp) do
      if p:is_html() then
        local hc = p:get_html()
        if (not hc) then return false end
        local found = false
        hc:foreach_tag('a', function (tag, length)
          -- Skip this loop if we already have a match
          if (found) then return true end
          local c = tag:get_content()
          if (c) then
            c = tostring(c):lower()
            if (not c:match('^http')) then return false end
            local u = tag:get_extra()
            if (not u) then return false end
            u = tostring(u):lower()
            if (not u:match('^http')) then return false end
            if ((c:match('^http:') and u:match('^https:')) or
                (c:match('^https:') and u:match('^http:')))
            then
              found = true
              return true
            end
          end
          return false
        end)
        if (found) then return true end
        return false
      end
    end
    return false
  end,
  description = 'Anchor text contains different scheme to target URL',
  score = 2.0,
  group = 'html'
}

rspamd_config.HTTP_TO_IP = {
  callback = function(task)
    local tp = task:get_text_parts()
    if (not tp) then return false end
    for _,p in ipairs(tp) do
      if p:is_html() then
        local hc = p:get_html()
        if (not hc) then return false end
        local found = false
        hc:foreach_tag('a', function (tag, length)
          if (found) then return true end
          local u = tag:get_extra()
          if (u) then
            u = tostring(u):lower()
            if (u:match('^https?://%d+%.%d+%.%d+%.%d+')) then
              found = true
            end
          end
          return false
        end)
        if found then return true end
        return false
      end
    end
  end,
  description = 'Anchor points to an IP address',
  score = 1.0,
  group = 'html'
}