--[[
Copyright (c) 2015, Vsevolod Stakhov <vsevolod@highsecure.ru>

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--

if confighelp then
  return
end

-- This plugin is intended to read and parse spamassassin rules with regexp
-- rules. SA plugins or statistics are not supported

local E = {}
local N = 'spamassassin'

local rspamd_logger = require "rspamd_logger"
local rspamd_regexp = require "rspamd_regexp"
local rspamd_expression = require "rspamd_expression"
local rspamd_trie = require "rspamd_trie"
local util = require "rspamd_util"
local lua_util = require "lua_util"
local fun = require "fun"

-- Known plugins
local known_plugins = {
  'Mail::SpamAssassin::Plugin::FreeMail',
  'Mail::SpamAssassin::Plugin::HeaderEval',
  'Mail::SpamAssassin::Plugin::ReplaceTags',
  'Mail::SpamAssassin::Plugin::RelayEval',
  'Mail::SpamAssassin::Plugin::MIMEEval',
  'Mail::SpamAssassin::Plugin::BodyEval',
  'Mail::SpamAssassin::Plugin::MIMEHeader',
  'Mail::SpamAssassin::Plugin::WLBLEval',
  'Mail::SpamAssassin::Plugin::HTMLEval',
}

-- Table that replaces SA symbol with rspamd equivalent
-- Used for dependency resolution
local symbols_replacements = {
  -- SPF replacements
  USER_IN_SPF_WHITELIST = 'WHITELIST_SPF',
  USER_IN_DEF_SPF_WL = 'WHITELIST_SPF',
  SPF_PASS = 'R_SPF_ALLOW',
  SPF_FAIL = 'R_SPF_FAIL',
  SPF_SOFTFAIL = 'R_SPF_SOFTFAIL',
  SPF_HELO_PASS = 'R_SPF_ALLOW',
  SPF_HELLO_FAIL = 'R_SPF_FAIL',
  SPF_HELLO_SOFTFAIL = 'R_SPF_SOFTFAIL',
  -- DKIM replacements
  USER_IN_DKIM_WHITELIST = 'WHITELIST_DKIM',
  USER_IN_DEF_DKIM_WL = 'WHITELIST_DKIM',
  DKIM_VALID = 'R_DKIM_ALLOW',
  -- SURBL replacements
  URIBL_SBL_A = 'URIBL_SBL',
  URIBL_DBL_SPAM = 'DBL_SPAM',
  URIBL_DBL_PHISH = 'DBL_PHISH',
  URIBL_DBL_MALWARE = 'DBL_MALWARE',
  URIBL_DBL_BOTNETCC = 'DBL_BOTNET',
  URIBL_DBL_ABUSE_SPAM = 'DBL_ABUSE',
  URIBL_DBL_ABUSE_REDIR = 'DBL_ABUSE_REDIR',
  URIBL_DBL_ABUSE_MALW = 'DBL_ABUSE_MALWARE',
  URIBL_DBL_ABUSE_BOTCC = 'DBL_ABUSE_BOTNET',
  URIBL_WS_SURBL = 'WS_SURBL_MULTI',
  URIBL_PH_SURBL = 'PH_SURBL_MULTI',
  URIBL_MW_SURBL = 'MW_SURBL_MULTI',
  URIBL_CR_SURBL = 'CRACKED_SURBL',
  URIBL_ABUSE_SURBL = 'ABUSE_SURBL',
  -- Misc rules
  BODY_URI_ONLY = 'R_EMPTY_IMAGE',
  HTML_IMAGE_ONLY_04 = 'HTML_SHORT_LINK_IMG_1',
  HTML_IMAGE_ONLY_08 = 'HTML_SHORT_LINK_IMG_1',
  HTML_IMAGE_ONLY_12 = 'HTML_SHORT_LINK_IMG_1',
  HTML_IMAGE_ONLY_16 = 'HTML_SHORT_LINK_IMG_2',
  HTML_IMAGE_ONLY_20 = 'HTML_SHORT_LINK_IMG_2',
  HTML_IMAGE_ONLY_24 = 'HTML_SHORT_LINK_IMG_3',
  HTML_IMAGE_ONLY_28 = 'HTML_SHORT_LINK_IMG_3',
  HTML_IMAGE_ONLY_32 = 'HTML_SHORT_LINK_IMG_3',
}

-- Internal variables
local rules = {}
local atoms = {}
local scores = {}
local scores_added = {}
local external_deps = {}
local freemail_domains = {}
local pcre_only_regexps = {}
local freemail_trie
local replace = {
  tags = {},
  pre = {},
  inter = {},
  post = {},
  rules = {},
}
local internal_regexp = {
  date_shift = rspamd_regexp.create("^\\(\\s*'((?:-?\\d+)|(?:undef))'\\s*,\\s*'((?:-?\\d+)|(?:undef))'\\s*\\)$")
}

-- Mail::SpamAssassin::Plugin::WLBLEval plugin
local sa_lists = {
  from_blacklist = {},
  from_whitelist = {},
  from_def_whitelist = {},
  to_blacklist = {},
  to_whitelist = {},
  elts = 0,
}

local func_cache = {}
local section = rspamd_config:get_all_opt("spamassassin")
if not (section and type(section) == 'table') then
  rspamd_logger.infox(rspamd_config, 'Module is unconfigured')
end

-- Minimum score to treat symbols as meta
local meta_score_alpha = 0.5

-- Maximum size of regexp checked
local match_limit = 0

local function split(str, delim)
  local result = {}

  if not delim then
    delim = '[^%s]+'
  end

  for token in string.gmatch(str, delim) do
    table.insert(result, token)
  end

  return result
end

local function replace_symbol(s)
  local rspamd_symbol = symbols_replacements[s]
  if not rspamd_symbol then
    return s, false
  end
  return rspamd_symbol, true
end

local ffi
if type(jit) == 'table' then
  ffi = require("ffi")
  ffi.cdef[[
    int rspamd_re_cache_type_from_string (const char *str);
    int rspamd_re_cache_process_ffi (void *ptask,
        void *pre,
        int type,
        const char *type_data,
        int is_strong);
]]
end

local function process_regexp_opt(re, task, re_type, header, strong)
  if type(jit) == 'table' then
    -- Use ffi call
    local itype = ffi.C.rspamd_re_cache_type_from_string(re_type)

    if not strong then
      strong = 0
    else
      strong = 1
    end
    local iret = ffi.C.rspamd_re_cache_process_ffi (task, re, itype, header, strong)

    return tonumber(iret)
  else
    return task:process_regexp(re, re_type, header, strong)
  end
end

local function is_pcre_only(name)
  if pcre_only_regexps[name] then
    return true
  end
  return false
end

local function handle_header_def(hline, cur_rule)
  --Now check for modifiers inside header's name
  local hdrs = split(hline, '[^|]+')
  local hdr_params = {}
  local cur_param = {}
  -- Check if an re is an ordinary re
  local ordinary = true

  for _,h in ipairs(hdrs) do
    if h == 'ALL' or h == 'ALL:raw' then
      ordinary = false
      cur_rule['type'] = 'function'
      -- Pack closure
      local re = cur_rule['re']
      -- Rule to match all headers
      rspamd_config:register_regexp({
        re = re,
        type = 'allheader',
        pcre_only = is_pcre_only(cur_rule['symbol']),
      })
      cur_rule['function'] = function(task)
        if not re then
          rspamd_logger.errx(task, 're is missing for rule %1', h)
          return 0
        end

        return process_regexp_opt(re, task, 'allheader')
      end
    else
      local args = split(h, '[^:]+')
      cur_param['strong'] = false
      cur_param['raw'] = false
      cur_param['header'] = args[1]

      if args[2] then
        -- We have some ops that are required for the header, so it's not ordinary
        ordinary = false
      end

      fun.each(function(func)
          if func == 'addr' then
            cur_param['function'] = function(str)
              local addr_parsed = util.parse_addr(str)
              local ret = {}
              if addr_parsed then
                for _,elt in ipairs(addr_parsed) do
                  if elt['addr'] then
                    table.insert(ret, elt['addr'])
                  end
                end
              end

              return ret
            end
          elseif func == 'name' then
            cur_param['function'] = function(str)
              local addr_parsed = util.parse_addr(str)
              local ret = {}
              if addr_parsed then
                for _,elt in ipairs(addr_parsed) do
                  if elt['name'] then
                    table.insert(ret, elt['name'])
                  end
                end
              end

              return ret
            end
          elseif func == 'raw' then
            cur_param['raw'] = true
          elseif func == 'case' then
            cur_param['strong'] = true
          else
            rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
              func, cur_rule['symbol'])
          end
        end, fun.tail(args))

        local function split_hdr_param(param, headers)
          for _,hh in ipairs(headers) do
            local nparam = {}
            for k,v in pairs(param) do
              if k ~= 'header' then
                nparam[k] = v
              end
            end

            nparam['header'] = hh
            table.insert(hdr_params, nparam)
          end
        end
        -- Some header rules require splitting to check of multiple headers
        if cur_param['header'] == 'MESSAGEID' then
          -- Special case for spamassassin
          ordinary = false
          split_hdr_param(cur_param, {
            'Message-ID',
            'X-Message-ID',
            'Resent-Message-ID'})
        elseif cur_param['header'] == 'ToCc' then
          ordinary = false
          split_hdr_param(cur_param, { 'To', 'Cc', 'Bcc' })
        else
          table.insert(hdr_params, cur_param)
        end
    end

    cur_rule['ordinary'] = ordinary
    cur_rule['header'] = hdr_params
  end
end


local function freemail_search(input)
  local res = 0
  local function trie_callback(number, pos)
    lua_util.debugm(N, rspamd_config, 'Matched pattern %1 at pos %2', freemail_domains[number], pos)
    res = res + 1
  end

  if input then
    freemail_trie:match(input, trie_callback, true)
  end

  return res
end

local function gen_eval_rule(arg)
  local eval_funcs = {
    {'check_freemail_from', function(task)
        local from = task:get_from('mime')
        if from and from[1] then
          return freemail_search(string.lower(from[1]['addr']))
        end
        return 0
      end},
    {'check_freemail_replyto',
      function(task)
        return freemail_search(task:get_header('Reply-To'))
      end
    },
    {'check_freemail_header',
      function(task, remain)
        -- Remain here contains one or two args: header and regexp to match
        local larg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$")
        local re = nil
        if not larg then
          larg, re = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*,%s*['\"]([^%s]+)['\"]%s*%)$")
        end

        if larg then
          local h
          if larg == 'EnvelopeFrom' then
            h = task:get_from('smtp')
            if h then h = h[1]['addr'] end
          else
            h = task:get_header(larg)
          end
          if h then
            local hdr_freemail = freemail_search(string.lower(h))

            if hdr_freemail > 0 and re then
              local r = rspamd_regexp.create_cached(re)
              if r then
                if r:match(h) then
                  return 1
                end
                return 0
              else
                rspamd_logger.infox(rspamd_config, 'cannot create regexp %1', re)
                return 0
              end
            end

            return hdr_freemail
          end
        end

        return 0
      end
    },
    {
      'check_for_missing_to_header',
      function (task)
        local th = task:get_recipients('mime')
        if not th or #th == 0 then
          return 1
        end

        return 0
      end
    },
    {
      'check_relays_unparseable',
      function(task)
        local rh_mime = task:get_header_full('Received')
        local rh_parsed = task:get_received_headers()

        local rh_cnt = 0
        if rh_mime then rh_cnt = #rh_mime end
        local parsed_cnt = 0
        if rh_parsed then parsed_cnt = #rh_parsed end

        return rh_cnt - parsed_cnt
      end
    },
    {
      'check_for_shifted_date',
      function (task, remain)
        -- Remain here contains two args: start and end hours shift
        local matches = internal_regexp['date_shift']:search(remain, true, true)
        if matches and matches[1] then
          local min_diff = matches[1][2]
          local max_diff = matches[1][3]

          if min_diff == 'undef' then
            min_diff = 0
          else
            min_diff = tonumber(min_diff) * 3600
          end
          if max_diff == 'undef' then
            max_diff = 0
          else
            max_diff = tonumber(max_diff) * 3600
          end

          -- Now get the difference between Date and message received date
          local dm = task:get_date { format = 'message', gmt = true}
          local dt = task:get_date { format = 'connect', gmt = true}
          local diff = dm - dt

          if (max_diff == 0 and diff >= min_diff) or
              (min_diff == 0 and diff <= max_diff) or
              (diff >= min_diff and diff <= max_diff) then
            return 1
          end
        end

        return 0
      end
    },
    {
      'check_for_mime',
      function(task, remain)
        local larg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$")

        if larg then
          if larg == 'mime_attachment' then
            local parts = task:get_parts()
            if parts then
              for _,p in ipairs(parts) do
                if p:get_filename() then
                  return 1
                end
              end
            end
          else
            rspamd_logger.infox(task, 'unimplemented mime check %1', arg)
          end
        end

        return 0
      end
    },
    {
      'check_from_in_blacklist',
      function(task)
        local from = task:get_from('mime')
        if ((from or E)[1] or E).addr then
          if sa_lists['from_blacklist'][string.lower(from[1]['addr'])] then
            return 1
          end
        end

        return 0
      end
    },
    {
      'check_from_in_whitelist',
      function(task)
        local from = task:get_from('mime')
        if ((from or E)[1] or E).addr then
          if sa_lists['from_whitelist'][string.lower(from[1]['addr'])] then
            return 1
          end
        end

        return 0
      end
    },
    {
      'check_from_in_default_whitelist',
      function(task)
        local from = task:get_from('mime')
        if ((from or E)[1] or E).addr then
          if sa_lists['from_def_whitelist'][string.lower(from[1]['addr'])] then
            return 1
          end
        end

        return 0
      end
    },
    {
      'check_to_in_blacklist',
      function(task)
        local rcpt = task:get_recipients('mime')
        if rcpt then
          for _,r in ipairs(rcpt) do
            if sa_lists['to_blacklist'][string.lower(r['addr'])] then
              return 1
            end
          end
        end

        return 0
      end
    },
    {
      'check_to_in_whitelist',
      function(task)
        local rcpt = task:get_recipients('mime')
        if rcpt then
          for _,r in ipairs(rcpt) do
            if sa_lists['to_whitelist'][string.lower(r['addr'])] then
              return 1
            end
          end
        end

        return 0
      end
    },
    {
      'html_tag_exists',
      function(task, remain)
        local tp = task:get_text_parts()

        for _,p in ipairs(tp) do
          if p:is_html() then
            local hc = p:get_html()

            if hc:has_tag(remain) then
              return 1
            end
          end
        end

        return 0
      end
    }
  }

  for _,f in ipairs(eval_funcs) do
    local pat = string.format('^%s', f[1])
    local first,last = string.find(arg, pat)

    if first then
      local func_arg = string.sub(arg, last + 1)
      return function(task)
        return f[2](task, func_arg)
      end
    end
  end
end

-- Returns parser function or nil
local function maybe_parse_sa_function(line)
  local arg
  local elts = split(line, '[^:]+')
  arg = elts[2]

  lua_util.debugm(N, rspamd_config, 'trying to parse SA function %1 with args %2',
    elts[1], elts[2])
  local substitutions = {
    {'^exists:',
      function(task) -- filter
        local hdrs_check
        if arg == 'MESSAGEID' then
          hdrs_check = {
            'Message-ID',
            'X-Message-ID',
            'Resent-Message-ID'
          }
        elseif arg == 'ToCc' then
          hdrs_check = { 'To', 'Cc', 'Bcc' }
        else
          hdrs_check = {arg}
        end

        for _,h in ipairs(hdrs_check) do
          if task:get_header(h) then
            return 1
          end
        end
        return 0
      end,
    },
    {'^eval:',
      function(task)
        local func = func_cache[arg]
        if not func then
          func = gen_eval_rule(arg)
          func_cache[arg] = func
        end

        if not func then
          rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %1',
            arg)
        else
          return func(task)
        end

        return 0
      end
    },
  }

  for _,s in ipairs(substitutions) do
    if string.find(line, s[1]) then
      return s[2]
    end
  end

  return nil
end

local function words_to_re(words, start)
  return table.concat(fun.totable(fun.drop_n(start, words)), " ");
end

local function process_tflags(rule, flags)
  fun.each(function(flag)
    if flag == 'publish' then
      rule['publish'] = true
    elseif flag == 'multiple' then
      rule['multiple'] = true
    elseif string.match(flag, '^maxhits=(%d+)$') then
      rule['maxhits'] = tonumber(string.match(flag, '^maxhits=(%d+)$'))
    elseif flag == 'nice' then
      rule['nice'] = true
    end
  end, fun.drop_n(1, flags))

  if rule['re'] then
    if rule['maxhits'] then
      rule['re']:set_max_hits(rule['maxhits'])
    elseif rule['multiple'] then
      rule['re']:set_max_hits(0)
    else
      rule['re']:set_max_hits(1)
    end
  end
end

local function process_replace(words, tbl)
  local re = words_to_re(words, 2)
  tbl[words[2]] = re
end

local function process_sa_conf(f)
  local cur_rule = {}
  local valid_rule = false

  local function insert_cur_rule()
   if cur_rule['type'] ~= 'meta' and cur_rule['publish'] then
     -- Create meta rule from this rule
     local nsym = '__fake' .. cur_rule['symbol']
     local nrule = {
       type = 'meta',
       symbol = cur_rule['symbol'],
       score = cur_rule['score'],
       meta = nsym,
       description = cur_rule['description'],
     }
     rules[nrule['symbol']] = nrule
     cur_rule['symbol'] = nsym
   end
   -- We have previous rule valid
   if not cur_rule['symbol'] then
     rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
   end
   rules[cur_rule['symbol']] = cur_rule
   cur_rule = {}
   valid_rule = false
  end

  local function parse_score(words)
    if #words == 3 then
      -- score rule <x>
      lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[3])
      return tonumber(words[3])
    elseif #words == 6 then
      -- score rule <x1> <x2> <x3> <x4>
      -- we assume here that bayes and network are enabled and select <x4>
      lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[6])
      return tonumber(words[6])
    else
      rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
    end

    return 0
  end

  local skip_to_endif = false
  local if_nested = 0
  for l in f:lines() do
    (function ()
    l = lua_util.rspamd_str_trim(l)
    -- Replace bla=~/re/ with bla =~ /re/ (#2372)
    l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')

    if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
      return
    end

    -- Unbalanced if/endif
    if if_nested < 0 then if_nested = 0 end
    if skip_to_endif then
      if string.match(l, '^endif') then
        if_nested = if_nested - 1

        if if_nested == 0 then
          skip_to_endif = false
        end
      elseif string.match(l, '^if') then
        if_nested = if_nested + 1
      elseif string.match(l, '^else') then
        -- Else counterpart for if
        skip_to_endif = false
      end
      return
    else
      if string.match(l, '^ifplugin') then
        local ls = split(l)

        if not fun.any(function(pl)
            if pl == ls[2] then return true end
            return false
            end, known_plugins) then
          skip_to_endif = true
        end
        if_nested = if_nested + 1
      elseif string.match(l, '^if !plugin%(') then
         local pname = string.match(l, '^if !plugin%(([A-Za-z:]+)%)')
         if fun.any(function(pl)
           if pl == pname then return true end
           return false
         end, known_plugins) then
           skip_to_endif = true
         end
         if_nested = if_nested + 1
      elseif string.match(l, '^if') then
        -- Unknown if
        skip_to_endif = true
        if_nested = if_nested + 1
      elseif string.match(l, '^else') then
        -- Else counterpart for if
        skip_to_endif = true
      elseif string.match(l, '^endif') then
        if_nested = if_nested - 1
      end
    end

    -- Skip comments
    local words = fun.totable(fun.take_while(
      function(w) return string.sub(w, 1, 1) ~= '#' end,
      fun.filter(function(w)
          return w ~= "" end,
      fun.iter(split(l)))))

    if words[1] == "header" or words[1] == 'mimeheader' then
      -- header SYMBOL Header ~= /regexp/
      if valid_rule then
        insert_cur_rule()
      end
      if words[4] and (words[4] == '=~' or words[4] == '!~') then
        cur_rule['type'] = 'header'
        cur_rule['symbol'] = words[2]

        if words[4] == '!~' then
          cur_rule['not'] = true
        end

        cur_rule['re_expr'] = words_to_re(words, 4)
        local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
        if unset_comp then
          -- We have optional part that needs to be processed
          local unset = string.match(string.sub(cur_rule['re_expr'], unset_comp),
            '%[if%-unset:%s*([^%]%s]+)]')
          cur_rule['unset'] = unset
          -- Cut it down
           cur_rule['re_expr'] = string.sub(cur_rule['re_expr'], 1, unset_comp - 1)
        end

        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])

        if not cur_rule['re'] then
          rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
            cur_rule['re_expr'], cur_rule['symbol'])
        else
          cur_rule['re']:set_max_hits(1)
          handle_header_def(words[3], cur_rule)
        end

        if words[1] == 'mimeheader' then
          cur_rule['mime'] = true
        else
          cur_rule['mime'] = false
        end

        if cur_rule['re'] and cur_rule['symbol'] and
          (cur_rule['header'] or cur_rule['function']) then
          valid_rule = true
          cur_rule['re']:set_max_hits(1)
          if cur_rule['header'] and cur_rule['ordinary'] then
            for _,h in ipairs(cur_rule['header']) do
              if type(h) == 'string' then
                if cur_rule['mime'] then
                  rspamd_config:register_regexp({
                    re = cur_rule['re'],
                    type = 'mimeheader',
                    header = h,
                    pcre_only = is_pcre_only(cur_rule['symbol']),
                  })
                else
                  rspamd_config:register_regexp({
                    re = cur_rule['re'],
                    type = 'header',
                    header = h,
                    pcre_only = is_pcre_only(cur_rule['symbol']),
                  })
                end
              else
                h['mime'] = cur_rule['mime']
                if cur_rule['mime'] then
                  rspamd_config:register_regexp({
                    re = cur_rule['re'],
                    type = 'mimeheader',
                    header = h['header'],
                    pcre_only = is_pcre_only(cur_rule['symbol']),
                  })
                else
                  if h['raw'] then
                    rspamd_config:register_regexp({
                      re = cur_rule['re'],
                      type = 'rawheader',
                      header = h['header'],
                      pcre_only = is_pcre_only(cur_rule['symbol']),
                    })
                  else
                    rspamd_config:register_regexp({
                      re = cur_rule['re'],
                      type = 'header',
                      header = h['header'],
                      pcre_only = is_pcre_only(cur_rule['symbol']),
                    })
                  end
                end
              end
            end
            cur_rule['re']:set_limit(match_limit)
            cur_rule['re']:set_max_hits(1)
          end
        end
      else
        -- Maybe we know the function and can convert it
        local args =  words_to_re(words, 2)
        local func = maybe_parse_sa_function(args)

        if func then
          cur_rule['type'] = 'function'
          cur_rule['symbol'] = words[2]
          cur_rule['function'] = func
          valid_rule = true
        else
          rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
        end
      end
    elseif words[1] == "body" then
      -- body SYMBOL /regexp/
      if valid_rule then
        insert_cur_rule()
      end

      cur_rule['symbol'] = words[2]
      if words[3] and (string.sub(words[3], 1, 1) == '/'
          or string.sub(words[3], 1, 1) == 'm') then
        cur_rule['type'] = 'sabody'
        cur_rule['re_expr'] = words_to_re(words, 2)
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
        if cur_rule['re'] then

          rspamd_config:register_regexp({
            re = cur_rule['re'],
            type = 'sabody',
            pcre_only = is_pcre_only(cur_rule['symbol']),
          })
          valid_rule = true
          cur_rule['re']:set_limit(match_limit)
          cur_rule['re']:set_max_hits(1)
        end
      else
        -- might be function
        local args = words_to_re(words, 2)
        local func = maybe_parse_sa_function(args)

        if func then
          cur_rule['type'] = 'function'
          cur_rule['symbol'] = words[2]
          cur_rule['function'] = func
          valid_rule = true
        else
          rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
        end
      end
    elseif words[1] == "rawbody" then
      -- body SYMBOL /regexp/
      if valid_rule then
        insert_cur_rule()
      end

      cur_rule['symbol'] = words[2]
      if words[3] and (string.sub(words[3], 1, 1) == '/'
          or string.sub(words[3], 1, 1) == 'm') then
        cur_rule['type'] = 'sarawbody'
        cur_rule['re_expr'] = words_to_re(words, 2)
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
        if cur_rule['re'] then

          rspamd_config:register_regexp({
            re = cur_rule['re'],
            type = 'sarawbody',
            pcre_only = is_pcre_only(cur_rule['symbol']),
          })
          valid_rule = true
          cur_rule['re']:set_limit(match_limit)
          cur_rule['re']:set_max_hits(1)
        end
      else
        -- might be function
        local args = words_to_re(words, 2)
        local func = maybe_parse_sa_function(args)

        if func then
          cur_rule['type'] = 'function'
          cur_rule['symbol'] = words[2]
          cur_rule['function'] = func
          valid_rule = true
        else
          rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
        end
      end
    elseif words[1] == "full" then
      -- body SYMBOL /regexp/
      if valid_rule then
        insert_cur_rule()
      end

      cur_rule['symbol'] = words[2]

      if words[3] and (string.sub(words[3], 1, 1) == '/'
          or string.sub(words[3], 1, 1) == 'm') then
        cur_rule['type'] = 'message'
        cur_rule['re_expr'] = words_to_re(words, 2)
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
        cur_rule['raw'] = true
        if cur_rule['re'] then
          valid_rule = true
          rspamd_config:register_regexp({
            re = cur_rule['re'],
            type = 'body',
            pcre_only = is_pcre_only(cur_rule['symbol']),
          })
          cur_rule['re']:set_limit(match_limit)
          cur_rule['re']:set_max_hits(1)
        end
      else
        -- might be function
        local args = words_to_re(words, 2)
        local func = maybe_parse_sa_function(args)

        if func then
          cur_rule['type'] = 'function'
          cur_rule['symbol'] = words[2]
          cur_rule['function'] = func
          valid_rule = true
        else
          rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
        end
      end
    elseif words[1] == "uri" then
      -- uri SYMBOL /regexp/
      if valid_rule then
        insert_cur_rule()
      end
      cur_rule['type'] = 'uri'
      cur_rule['symbol'] = words[2]
      cur_rule['re_expr'] = words_to_re(words, 2)
      cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
      if cur_rule['re'] and cur_rule['symbol'] then
        valid_rule = true
        rspamd_config:register_regexp({
          re = cur_rule['re'],
          type = 'url',
          pcre_only = is_pcre_only(cur_rule['symbol']),
        })
        cur_rule['re']:set_limit(match_limit)
        cur_rule['re']:set_max_hits(1)
      end
    elseif words[1] == "meta" then
      -- meta SYMBOL expression
      if valid_rule then
        insert_cur_rule()
      end
      cur_rule['type'] = 'meta'
      cur_rule['symbol'] = words[2]
      cur_rule['meta'] = words_to_re(words, 2)
      if cur_rule['meta'] and cur_rule['symbol']
        and cur_rule['meta'] ~= '0' then
          valid_rule = true
      end
    elseif words[1] == "describe" and valid_rule then
      cur_rule['description'] = words_to_re(words, 2)
    elseif words[1] == "score" then
      scores[words[2]] = parse_score(words)
    elseif words[1] == 'freemail_domains' then
      fun.each(function(dom)
          table.insert(freemail_domains, '@' .. dom)
        end, fun.drop_n(1, words))
    elseif words[1] == 'blacklist_from' then
      sa_lists['from_blacklist'][words[2]] = 1
      sa_lists['elts'] = sa_lists['elts'] + 1
    elseif words[1] == 'whitelist_from' then
      sa_lists['from_whitelist'][words[2]] = 1
      sa_lists['elts'] = sa_lists['elts'] + 1
    elseif words[1] == 'whitelist_to' then
      sa_lists['to_whitelist'][words[2]] = 1
      sa_lists['elts'] = sa_lists['elts'] + 1
    elseif words[1] == 'blacklist_to' then
      sa_lists['to_blacklist'][words[2]] = 1
      sa_lists['elts'] = sa_lists['elts'] + 1
    elseif words[1] == 'tflags' then
      process_tflags(cur_rule, words)
    elseif words[1] == 'replace_tag' then
      process_replace(words, replace['tags'])
    elseif words[1] == 'replace_pre' then
      process_replace(words, replace['pre'])
    elseif words[1] == 'replace_inter' then
      process_replace(words, replace['inter'])
    elseif words[1] == 'replace_post' then
      process_replace(words, replace['post'])
    elseif words[1] == 'replace_rules' then
      fun.each(function(r) table.insert(replace['rules'], r) end,
        fun.drop_n(1, words))
    end
    end)()
  end
  if valid_rule then
    insert_cur_rule()
  end
end

-- Now check all valid rules and add the according rspamd rules

local function calculate_score(sym, rule)
  if fun.all(function(c) return c == '_' end, fun.take_n(2, fun.iter(sym))) then
    return 0.0
  end

  if rule['nice'] or (rule['score'] and rule['score'] < 0.0) then
    return -1.0
  end

  return 1.0
end

local function add_sole_meta(sym, rule)
  local r = {
    type = 'meta',
    meta = rule['symbol'],
    score = rule['score'],
    description = rule['description']
  }
  rules[sym] = r
end

local function sa_regexp_match(data, re, raw, rule)
  local res = 0
  if not re then
    return 0
  end
  if rule['multiple'] then
    local lim = -1
    if rule['maxhits'] then
      lim = rule['maxhits']
    end
    res = res + re:matchn(data, lim, raw)
  else
    if re:match(data, raw) then res = 1 end
  end

  return res
end

local function apply_replacements(str)
  local pre = ""
  local post = ""
  local inter = ""

  local function check_specific_tag(prefix, s, tbl)
    local replacement = nil
    local ret = s
    fun.each(function(n, t)
      local ns,matches = string.gsub(s, string.format("<%s%s>", prefix, n), "")
      if matches > 0 then
        replacement = t
        ret = ns
      end
    end, tbl)

    return ret,replacement
  end

  local repl
  str,repl = check_specific_tag("pre ", str, replace['pre'])
  if repl then
    pre = repl
  end
  str,repl = check_specific_tag("inter ", str, replace['inter'])
  if repl then
    inter = repl
  end
  str,repl = check_specific_tag("post ", str, replace['post'])
  if repl then
    post = repl
  end

  -- XXX: ugly hack
  if inter then
    str = string.gsub(str, "><", string.format(">%s<", inter))
  end

  local function replace_all_tags(s)
    local sstr
    sstr = s
    fun.each(function(n, t)
      local rep = string.format("%s%s%s", pre, t, post)
      rep = string.gsub(rep, '%%', '%%%%')
      sstr = string.gsub(sstr, string.format("<%s>", n), rep)
    end, replace['tags'])

    return sstr
  end

  local s = replace_all_tags(str)


  if str ~= s then
    return true,s
  end

  return false,str
end

local function parse_atom(str)
  local atom = table.concat(fun.totable(fun.take_while(function(c)
    if string.find(', \t()><+!|&\n', c) then
      return false
    end
    return true
  end, fun.iter(str))), '')

  return atom
end

local function process_atom(atom, task)
  local atom_cb = atoms[atom]

  if atom_cb then
    local res = atom_cb(task)

    if not res then
      lua_util.debugm(N, task, 'atom: %1, NULL result', atom)
    elseif res > 0 then
      lua_util.debugm(N, task, 'atom: %1, result: %2', atom, res)
    end
    return res
  else
    -- This is likely external atom
    local real_sym = atom
    if symbols_replacements[atom] then
      real_sym = symbols_replacements[atom]
    end
    if task:has_symbol(real_sym) then
      lua_util.debugm(N, task, 'external atom: %1, result: 1', real_sym)
      return 1
    end
    lua_util.debugm(N, task, 'external atom: %1, result: 0', real_sym)
  end
  return 0
end

local function post_process()
  -- Replace rule tags
  local ntags = {}
  local function rec_replace_tags(tag, tagv)
    if ntags[tag] then return ntags[tag] end
    fun.each(function(n, t)
      if n ~= tag then
        local s, matches = string.gsub(tagv, string.format("<%s>", n), t)
        if matches > 0 then
          ntags[tag] = rec_replace_tags(tag, s)
        end
      end
    end, replace['tags'])

    if not ntags[tag] then ntags[tag] = tagv end
    return ntags[tag]
  end

  fun.each(function(n, t)
    rec_replace_tags(n, t)
  end, replace['tags'])
  fun.each(function(n, t)
    replace['tags'][n] = t
  end, ntags)

  fun.each(function(r)
    local rule = rules[r]

    if rule['re_expr'] and rule['re'] then
      local res, nexpr = apply_replacements(rule['re_expr'])
      if res then
        local nre = rspamd_regexp.create(nexpr)
        if not nre then
          rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r)
          --rule['re'] = nil
        else
          local old_max_hits = rule['re']:get_max_hits()
          lua_util.debugm(N, rspamd_config, 'replace %1 -> %2', r, nexpr)
          rspamd_config:replace_regexp({
            old_re = rule['re'],
            new_re = nre
          })
          rule['re'] = nre
          rule['re_expr'] = nexpr
          nre:set_limit(match_limit)
          nre:set_max_hits(old_max_hits)
        end
      end
    end
  end, replace['rules'])

  fun.each(function(key, score)
    if rules[key] then
      rules[key]['score'] = score
    end
  end, scores)

  -- Header rules
  fun.each(function(k, r)
    local f = function(task)

      local raw = false
      local check = {}
      -- Cached path for ordinary expressions
      if r['ordinary'] then
        local h = r['header'][1]
        local t = 'header'

        if h['raw'] then
          t = 'rawheader'
        end

        if not r['re'] then
          rspamd_logger.errx(task, 're is missing for rule %1 (%2 header)', k,
            h['header'])
          return 0
        end

        local ret = process_regexp_opt(r.re, task, t, h.header, h.strong)

        if r['not'] then
          if ret ~= 0 then
            ret = 0
          else
            ret = 1
          end
        end

        return ret
      end

      -- Slow path
      fun.each(function(h)
        local hname = h['header']

        local hdr
        if h['mime'] then
          local parts = task:get_parts()
          for _, p in ipairs(parts) do
            local m_hdr = p:get_header_full(hname, h['strong'])

            if m_hdr then
              if not hdr then
                hdr = {}
              end
              for _, mh in ipairs(m_hdr) do
                table.insert(hdr, mh)
              end
            end
          end
        else
          hdr = task:get_header_full(hname, h['strong'])
        end

        if hdr then
          for _, rh in ipairs(hdr) do
            -- Subject for optimization
            local str
            if h['raw'] then
              str = rh['value']
              raw = true
            else
              str = rh['decoded']
            end
            if not str then return 0 end

            if h['function'] then
              str = h['function'](str)
            end

            if type(str) == 'string' then
              table.insert(check, str)
            else
              for _, c in ipairs(str) do
                table.insert(check, c)
              end
            end
          end
        elseif r['unset'] then
          table.insert(check, r['unset'])
        end
      end, r['header'])

      if #check == 0 then
        if r['not'] then return 1 end
        return 0
      end

      local ret = 0
      for _, c in ipairs(check) do
        local match = sa_regexp_match(c, r['re'], raw, r)
        if (match > 0 and not r['not']) or (match == 0 and r['not']) then
          ret = 1
        end
      end

      return ret
    end
    if r['score'] then
      local real_score = r['score'] * calculate_score(k, r)
      if math.abs(real_score) > meta_score_alpha then
        add_sole_meta(k, r)
      end
    end
    atoms[k] = f
  end,
  fun.filter(function(_, r)
      return r['type'] == 'header' and r['header']
  end,
  rules))

  -- Custom function rules
  fun.each(function(k, r)
    local f = function(task)
      local res = r['function'](task)
      if res and res > 0 then
        return res
      end
      return 0
    end
    if r['score'] then
      local real_score = r['score'] * calculate_score(k, r)
      if math.abs(real_score) > meta_score_alpha then
        add_sole_meta(k, r)
      end
    end
    atoms[k] = f
  end,
    fun.filter(function(_, r)
      return r['type'] == 'function' and r['function']
    end,
      rules))

  -- Parts rules
  fun.each(function(k, r)
    local f = function(task)
      if not r['re'] then
        rspamd_logger.errx(task, 're is missing for rule %1', k)
        return 0
      end

      local t = 'mime'
      if r['raw'] then t = 'rawmime' end

      return process_regexp_opt(r.re, task, t)
    end
    if r['score'] then
      local real_score = r['score'] * calculate_score(k, r)
      if math.abs(real_score) > meta_score_alpha then
        add_sole_meta(k, r)
      end
    end
    atoms[k] = f
  end,
  fun.filter(function(_, r)
      return r['type'] == 'part'
  end, rules))

  -- SA body rules
  fun.each(function(k, r)
    local f = function(task)
      if not r['re'] then
        rspamd_logger.errx(task, 're is missing for rule %1', k)
        return 0
      end

      local t = r['type']

      local ret = process_regexp_opt(r.re, task, t)
      return ret
    end
    if r['score'] then
      local real_score = r['score'] * calculate_score(k, r)
      if math.abs(real_score) > meta_score_alpha then
        add_sole_meta(k, r)
      end
    end
    atoms[k] = f
  end,
  fun.filter(function(_, r)
      return r['type'] == 'sabody' or r['type'] == 'message' or r['type'] == 'sarawbody'
  end, rules))

  -- URL rules
  fun.each(function(k, r)
    local f = function(task)
      if not r['re'] then
        rspamd_logger.errx(task, 're is missing for rule %1', k)
        return 0
      end

      return process_regexp_opt(r.re, task, 'url')
    end
    if r['score'] then
      local real_score = r['score'] * calculate_score(k, r)
      if math.abs(real_score) > meta_score_alpha then
        add_sole_meta(k, r)
      end
    end
    atoms[k] = f
  end,
    fun.filter(function(_, r)
      return r['type'] == 'uri'
    end,
      rules))
  -- Meta rules
  fun.each(function(k, r)
      local expression = nil
      -- Meta function callback
      local meta_cb = function(task)
        local res = 0
        local trace = {}
        -- XXX: need to memoize result for better performance
        local sym = task:has_symbol(k)
        if not sym then
          if expression then
            res,trace = expression:process_traced(task)
          end
          if res > 0 then
            -- Symbol should be one shot to make it working properly
            task:insert_result(k, res, trace)
          end
        else
          res = 1
        end

        return res
      end
      expression = rspamd_expression.create(r['meta'],
        {parse_atom, process_atom}, rspamd_config:get_mempool())
      if not expression then
        rspamd_logger.errx(rspamd_config, 'Cannot parse expression ' .. r['meta'])
      else
        if r['score'] then
          rspamd_config:set_metric_symbol({
            name = k, score = r['score'],
            description = r['description'],
            priority = 2,
            one_shot = true })
          scores_added[k] = 1
        end
        rspamd_config:register_symbol({
          name = k,
          weight = calculate_score(k, r),
          callback = meta_cb
        })
        r['expression'] = expression
        if not atoms[k] then
          atoms[k] = meta_cb
        end
      end
    end,
    fun.filter(function(_, r)
        return r['type'] == 'meta'
      end,
      rules))

  -- Check meta rules for foreign symbols and register dependencies
  -- First direct dependencies:
  fun.each(function(k, r)
      if r['expression'] then
        local expr_atoms = r['expression']:atoms()

        for _,a in ipairs(expr_atoms) do
          if not atoms[a] then
            local rspamd_symbol = replace_symbol(a)
            if not external_deps[k] then
              external_deps[k] = {}
            end

            if not external_deps[k][rspamd_symbol] then
              rspamd_config:register_dependency(k, rspamd_symbol)
              external_deps[k][rspamd_symbol] = true
              lua_util.debugm(N, rspamd_config,
                'atom %1 is a direct foreign dependency, ' ..
                'register dependency for %2 on %3',
                a, k, rspamd_symbol)
            end
          end
        end
      end
    end,
    fun.filter(function(_, r)
      return r['type'] == 'meta'
    end,
    rules))

  -- ... And then indirect ones ...
  local nchanges
  repeat
  nchanges = 0
    fun.each(function(k, r)
      if r['expression'] then
        local expr_atoms = r['expression']:atoms()
        for _,a in ipairs(expr_atoms) do
          if type(external_deps[a]) == 'table' then
            for dep in pairs(external_deps[a]) do
              if not external_deps[k] then
                external_deps[k] = {}
              end
              if not external_deps[k][dep] then
                rspamd_config:register_dependency(k, dep)
                external_deps[k][dep] = true
                lua_util.debugm(N, rspamd_config,
                  'atom %1 is an indirect foreign dependency, ' ..
                  'register dependency for %2 on %3',
                  a, k, dep)
                  nchanges = nchanges + 1
              end
            end
          else
            local rspamd_symbol, replaced_symbol = replace_symbol(a)
            if replaced_symbol then
              external_deps[a] = {[rspamd_symbol] = true}
            else
              external_deps[a] = {}
            end
          end
        end
      end
    end,
    fun.filter(function(_, r)
      return r['type'] == 'meta'
    end,
    rules))
  until nchanges == 0

  -- Set missing symbols
  fun.each(function(key, score)
    if not scores_added[key] then
      rspamd_config:set_metric_symbol({
            name = key, score = score,
            priority = 2, flags = 'ignore'})
    end
  end, scores)

  -- Logging output
  if freemail_domains then
    freemail_trie = rspamd_trie.create(freemail_domains)
    rspamd_logger.infox(rspamd_config, 'loaded %1 freemail domains definitions',
      #freemail_domains)
  end
  rspamd_logger.infox(rspamd_config, 'loaded %1 blacklist/whitelist elements',
      sa_lists['elts'])
end

local has_rules = false

if type(section) == "table" then
  for k, fn in pairs(section) do
    if k == 'alpha' and type(fn) == 'number' then
      meta_score_alpha = fn
    elseif k == 'match_limit' and type(fn) == 'number' then
      match_limit = fn
    elseif k == 'pcre_only' and type(fn) == 'table' then
      for _,s in ipairs(fn) do
        pcre_only_regexps[s] = 1
      end
    else
      if type(fn) == 'table' then
        for _, elt in ipairs(fn) do
          local files = util.glob(elt)

          for _,matched in ipairs(files) do
            local f = io.open(matched, "r")
            if f then
              process_sa_conf(f)
              has_rules = true
            else
              rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
            end
          end
        end
      else
        -- assume string
        local files = util.glob(fn)

        for _,matched in ipairs(files) do
          local f = io.open(matched, "r")
          if f then
            process_sa_conf(f)
            has_rules = true
          else
            rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
          end
        end
      end
    end
  end
end

if has_rules then
  post_process()
else
  lua_util.disable_module(N, "config")
end