1 files changed, 519 insertions, 149 deletions
diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua
index feccae73f..331dbbce2 100644
--- a/src/plugins/lua/gpt.lua
+++ b/src/plugins/lua/gpt.lua
@@ -15,13 +15,14 @@ limitations under the License.
 ]] --
 
 local N = "gpt"
+local REDIS_PREFIX = "rsllm"
 local E = {}
 
 if confighelp then
   rspamd_config:add_example(nil, 'gpt',
-      "Performs postfiltering using GPT model",
-      [[
-gpt {
+    "Performs postfiltering using GPT model",
+    [[
+  gpt {
   # Supported types: openai, ollama
   type = "openai";
   # Your key to access the API
@@ -48,7 +49,11 @@ gpt {
   allow_passthrough = false;
   # Check messages that are apparent ham (no action and negative score)
   allow_ham = false;
-}
+  # Add header with reason (null to disable)
+  reason_header = "X-GPT-Reason";
+  # Use JSON format for response
+  json = false;
+  }
   ]])
   return
 end
@@ -57,8 +62,10 @@ local lua_util = require "lua_util"
 local rspamd_http = require "rspamd_http"
 local rspamd_logger = require "rspamd_logger"
 local lua_mime = require "lua_mime"
+local lua_redis = require "lua_redis"
 local ucl = require "ucl"
 local fun = require "fun"
+local lua_cache = require "lua_cache"
 
 -- Exclude checks if one of those is found
 local default_symbols_to_except = {
@@ -71,6 +78,32 @@ local default_symbols_to_except = {
   BOUNCE = -1,
 }
 
+local default_extra_symbols = {
+  GPT_MARKETING = {
+    score = 0.0,
+    description = 'GPT model detected marketing content',
+    category = 'marketing',
+  },
+  GPT_PHISHING = {
+    score = 3.0,
+    description = 'GPT model detected phishing content',
+    category = 'phishing',
+  },
+  GPT_SCAM = {
+    score = 3.0,
+    description = 'GPT model detected scam content',
+    category = 'scam',
+  },
+  GPT_MALWARE = {
+    score = 3.0,
+    description = 'GPT model detected malware content',
+    category = 'malware',
+  },
+}
+
+-- Should be filled from extra symbols
+local categories_map = {}
+
 local settings = {
   type = 'openai',
   api_key = nil,
@@ -81,11 +114,18 @@ local settings = {
   prompt = nil,
   condition = nil,
   autolearn = false,
+  reason_header = nil,
   url = 'https://api.openai.com/v1/chat/completions',
-  symbols_to_except = default_symbols_to_except,
+  symbols_to_except = nil,
+  symbols_to_trigger = nil, -- Exclude/include logic
   allow_passthrough = false,
   allow_ham = false,
+  json = false,
+  extra_symbols = nil,
+  cache_prefix = REDIS_PREFIX,
 }
+local redis_params
+local cache_context
 
 local function default_condition(task)
   -- Check result
@@ -108,22 +148,44 @@ local function default_condition(task)
       return false, 'negative score, already decided as ham'
     end
   end
-  -- We also exclude some symbols
-  for s, required_weight in pairs(settings.symbols_to_except) do
-    if task:has_symbol(s) then
-      if required_weight > 0 then
-        -- Also check score
-        local sym = task:get_symbol(s) or E
-        -- Must exist as we checked it before with `has_symbol`
-        if sym.weight then
-          if math.abs(sym.weight) >= required_weight then
-            return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')'
+
+  if settings.symbols_to_except then
+    for s, required_weight in pairs(settings.symbols_to_except) do
+      if task:has_symbol(s) then
+        if required_weight > 0 then
+          -- Also check score
+          local sym = task:get_symbol(s) or E
+          -- Must exist as we checked it before with `has_symbol`
+          if sym.weight then
+            if math.abs(sym.weight) >= required_weight then
+              return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')'
+            end
           end
+          lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
+            sym.weight, required_weight)
+        else
+          return false, 'skip as "' .. s .. '" is found'
         end
-        lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
+      end
+    end
+  end
+  if settings.symbols_to_trigger then
+    for s, required_weight in pairs(settings.symbols_to_trigger) do
+      if task:has_symbol(s) then
+        if required_weight > 0 then
+          -- Also check score
+          local sym = task:get_symbol(s) or E
+          -- Must exist as we checked it before with `has_symbol`
+          if sym.weight then
+            if math.abs(sym.weight) < required_weight then
+              return false, 'skip as "' .. s .. '" is found with low weight (weight: ' .. sym.weight .. ')'
+            end
+          end
+          lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
             sym.weight, required_weight)
+        end
       else
-        return false, 'skip as "' .. s .. '" is found'
+        return false, 'skip as "' .. s .. '" is not found'
       end
     end
   end
@@ -147,10 +209,10 @@ local function default_condition(task)
     local words = sel_part:get_words('norm')
     nwords = #words
     if nwords > settings.max_tokens then
-      return true, table.concat(words, ' ', 1, settings.max_tokens)
+      return true, table.concat(words, ' ', 1, settings.max_tokens), sel_part
     end
   end
-  return true, sel_part:get_content_oneline()
+  return true, sel_part:get_content_oneline(), sel_part
 end
 
 local function maybe_extract_json(str)
@@ -191,7 +253,16 @@ local function maybe_extract_json(str)
   return nil
 end
 
-local function default_conversion(task, input)
+-- Helper function to remove <think>...</think> and trim leading newlines
+local function clean_gpt_response(text)
+  -- Remove <think>...</think> including multiline
+  text = text:gsub("<think>.-</think>", "")
+  -- Trim leading whitespace and newlines
+  text = text:gsub("^%s*\n*", "")
+  return text
+end
+
+local function default_openai_json_conversion(task, input)
   local parser = ucl.parser()
   local res, err = parser:parse_string(input)
   if not res then
@@ -239,7 +310,7 @@ local function default_conversion(task, input)
       elseif reply.probability == "low" then
         spam_score = 0.1
       else
-        rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability)
+        rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability)
       end
     end
 
@@ -247,14 +318,111 @@ local function default_conversion(task, input)
       rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
     end
 
-    return spam_score
+    return spam_score, reply.reason, {}
   end
 
   rspamd_logger.errx(task, 'cannot convert spam score: %s', first_message)
   return
 end
 
-local function ollama_conversion(task, input)
+-- Remove what we don't need
+local function clean_reply_line(line)
+  if not line then
+    return ''
+  end
+  return lua_util.str_trim(line):gsub("^%d%.%s+", "")
+end
+
+-- Assume that we have 3 lines: probability, reason, additional symbols
+local function default_openai_plain_conversion(task, input)
+  local parser = ucl.parser()
+  local res, err = parser:parse_string(input)
+  if not res then
+    rspamd_logger.errx(task, 'cannot parse reply: %s', err)
+    return
+  end
+  local reply = parser:get_object()
+  if not reply then
+    rspamd_logger.errx(task, 'cannot get object from reply')
+    return
+  end
+
+  if type(reply.choices) ~= 'table' or type(reply.choices[1]) ~= 'table' then
+    rspamd_logger.errx(task, 'no choices in reply')
+    return
+  end
+
+  local first_message = reply.choices[1].message.content
+
+  if not first_message then
+    rspamd_logger.errx(task, 'no content in the first message')
+    return
+  end
+
+  -- Clean message
+  first_message = clean_gpt_response(first_message)
+
+  local lines = lua_util.str_split(first_message, '\n')
+  local first_line = clean_reply_line(lines[1])
+  local spam_score = tonumber(first_line)
+  local reason = clean_reply_line(lines[2])
+  local categories = lua_util.str_split(clean_reply_line(lines[3]), ',')
+
+  if type(reply.usage) == 'table' then
+    rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
+  end
+
+  if spam_score then
+    return spam_score, reason, categories
+  end
+
+  rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1], first_message)
+  return
+end
+
+local function default_ollama_plain_conversion(task, input)
+  local parser = ucl.parser()
+  local res, err = parser:parse_string(input)
+  if not res then
+    rspamd_logger.errx(task, 'cannot parse reply: %s', err)
+    return
+  end
+  local reply = parser:get_object()
+  if not reply then
+    rspamd_logger.errx(task, 'cannot get object from reply')
+    return
+  end
+
+  if type(reply.message) ~= 'table' then
+    rspamd_logger.errx(task, 'bad message in reply')
+    return
+  end
+
+  local first_message = reply.message.content
+
+  if not first_message then
+    rspamd_logger.errx(task, 'no content in the first message')
+    return
+  end
+
+  -- Clean message
+  first_message = clean_gpt_response(first_message)
+
+  local lines = lua_util.str_split(first_message, '\n')
+  local first_line = clean_reply_line(lines[1])
+  local spam_score = tonumber(first_line)
+  local reason = clean_reply_line(lines[2])
+  local categories = lua_util.str_split(clean_reply_line(lines[3]), ',')
+
+  if spam_score then
+    return spam_score, reason, categories
+  end
+
+  rspamd_logger.errx(task, 'cannot parse plain gpt reply: %s (all: %s)', lines[1], first_message)
+  return
+end
+
+local function default_ollama_json_conversion(task, input)
   local parser = ucl.parser()
   local res, err = parser:parse_string(input)
   if not res then
@@ -302,7 +470,7 @@ local function ollama_conversion(task, input)
       elseif reply.probability == "low" then
         spam_score = 0.1
       else
-        rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability)
+        rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability)
       end
     end
 
@@ -310,13 +478,126 @@ local function ollama_conversion(task, input)
       rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
     end
 
-    return spam_score
+    return spam_score, reply.reason
   end
 
   rspamd_logger.errx(task, 'cannot convert spam score: %s', first_message)
   return
 end
 
+-- Make cache specific to all settings to avoid conflicts
+local env_digest = nil
+
+local function redis_cache_key(sel_part)
+  if not env_digest then
+    local hasher = require "rspamd_cryptobox_hash"
+    local digest = hasher.create()
+    digest:update(settings.prompt)
+    digest:update(settings.model)
+    digest:update(settings.url)
+    env_digest = digest:hex():sub(1, 4)
+  end
+  return string.format('%s_%s', env_digest,
+    sel_part:get_mimepart():get_digest():sub(1, 24))
+end
+
+local function process_categories(task, categories)
+  for _, category in ipairs(categories) do
+    local sym = categories_map[category:lower()]
+    if sym then
+      task:insert_result(sym.name, 1.0)
+    end
+  end
+end
+
+local function insert_results(task, result, sel_part)
+  if not result.probability then
+    rspamd_logger.errx(task, 'no probability in result')
+    return
+  end
+
+  if result.probability > 0.5 then
+    task:insert_result('GPT_SPAM', (result.probability - 0.5) * 2, tostring(result.probability))
+    if settings.autolearn then
+      task:set_flag("learn_spam")
+    end
+
+    if result.categories then
+      process_categories(task, result.categories)
+    end
+  else
+    task:insert_result('GPT_HAM', (0.5 - result.probability) * 2, tostring(result.probability))
+    if settings.autolearn then
+      task:set_flag("learn_ham")
+    end
+    if result.categories then
+      process_categories(task, result.categories)
+    end
+  end
+  if result.reason and settings.reason_header then
+    lua_mime.modify_headers(task,
+      { add = { [settings.reason_header] = { value = tostring(result.reason), order = 1 } } })
+  end
+
+  if cache_context then
+    lua_cache.cache_set(task, redis_cache_key(sel_part), result, cache_context)
+  end
+end
+
+local function check_consensus_and_insert_results(task, results, sel_part)
+  for _, result in ipairs(results) do
+    if not result.checked then
+      return
+    end
+  end
+
+  local nspam, nham = 0, 0
+  local max_spam_prob, max_ham_prob = 0, 0
+  local reasons = {}
+
+  for _, result in ipairs(results) do
+    if result.success then
+      if result.probability > 0.5 then
+        nspam = nspam + 1
+        max_spam_prob = math.max(max_spam_prob, result.probability)
+        lua_util.debugm(N, task, "model: %s; spam: %s; reason: '%s'",
+          result.model, result.probability, result.reason)
+      else
+        nham = nham + 1
+        max_ham_prob = math.min(max_ham_prob, result.probability)
+        lua_util.debugm(N, task, "model: %s; ham: %s; reason: '%s'",
+          result.model, result.probability, result.reason)
+      end
+
+      if result.reason then
+        table.insert(reasons, result)
+      end
+    end
+  end
+
+  lua_util.shuffle(reasons)
+  local reason = reasons[1] or nil
+
+  if nspam > nham and max_spam_prob > 0.75 then
+    insert_results(task, {
+        probability = max_spam_prob,
+        reason = reason.reason,
+        categories = reason.categories,
+      },
+      sel_part)
+  elseif nham > nspam and max_ham_prob < 0.25 then
+    insert_results(task, {
+        probability = max_ham_prob,
+        reason = reason.reason,
+        categories = reason.categories,
+      },
+      sel_part)
+  else
+    -- No consensus
+    lua_util.debugm(N, task, "no consensus")
+  end
+end
+
 local function get_meta_llm_content(task)
   local url_content = "Url domains: no urls found"
   if task:has_urls() then
@@ -334,57 +615,70 @@ local function get_meta_llm_content(task)
   return url_content, from_content
 end
 
-local function default_llm_check(task)
-  local ret, content = settings.condition(task)
+local function check_llm_uncached(task, content, sel_part)
+  return settings.specific_check(task, content, sel_part)
+end
 
-  if not ret then
-    rspamd_logger.info(task, "skip checking gpt as the condition is not met: %s", content)
-    return
-  end
+local function check_llm_cached(task, content, sel_part)
+  local cache_key = redis_cache_key(sel_part)
 
-  if not content then
-    lua_util.debugm(N, task, "no content to send to gpt classification")
-    return
-  end
+  lua_cache.cache_get(task, cache_key, cache_context, settings.timeout * 1.5, function()
+    check_llm_uncached(task, content, sel_part)
+  end, function(_, err, data)
+    if err then
+      rspamd_logger.errx(task, 'cannot get cache: %s', err)
+      check_llm_uncached(task, content, sel_part)
+    end
 
+    if data then
+      rspamd_logger.infox(task, 'found cached response %s', cache_key)
+      insert_results(task, data, sel_part)
+    else
+      check_llm_uncached(task, content, sel_part)
+    end
+  end)
+end
+
+local function openai_check(task, content, sel_part)
   lua_util.debugm(N, task, "sending content to gpt: %s", content)
 
   local upstream
 
-  local function on_reply(err, code, body)
+  local results = {}
 
-    if err then
-      rspamd_logger.errx(task, 'request failed: %s', err)
-      upstream:fail()
-      return
-    end
+  local function gen_reply_closure(model, idx)
+    return function(err, code, body)
+      results[idx].checked = true
+      if err then
+        rspamd_logger.errx(task, '%s: request failed: %s', model, err)
+        upstream:fail()
+        check_consensus_and_insert_results(task, results, sel_part)
+        return
+      end
 
-    upstream:ok()
-    lua_util.debugm(N, task, "got reply: %s", body)
-    if code ~= 200 then
-      rspamd_logger.errx(task, 'bad reply: %s', body)
-      return
-    end
+      upstream:ok()
+      lua_util.debugm(N, task, "%s: got reply: %s", model, body)
+      if code ~= 200 then
+        rspamd_logger.errx(task, 'bad reply: %s', body)
+        return
+      end
 
-    local reply = settings.reply_conversion(task, body)
-    if not reply then
-      return
-    end
+      local reply, reason, categories = settings.reply_conversion(task, body)
 
-    if reply > 0.75 then
-      task:insert_result('GPT_SPAM', (reply - 0.75) * 4, tostring(reply))
-      if settings.autolearn then
-        task:set_flag("learn_spam")
-      end
-    elseif reply < 0.25 then
-      task:insert_result('GPT_HAM', (0.25 - reply) * 4, tostring(reply))
-      if settings.autolearn then
-        task:set_flag("learn_ham")
+      results[idx].model = model
+
+      if reply then
+        results[idx].success = true
+        results[idx].probability = reply
+        results[idx].reason = reason
+
+        if categories then
+          results[idx].categories = categories
+        end
       end
-    else
-      lua_util.debugm(N, task, "uncertain result: %s", reply)
-    end
 
+      check_consensus_and_insert_results(task, results, sel_part)
+    end
   end
 
   local from_content, url_content = get_meta_llm_content(task)
@@ -393,7 +687,6 @@ local function default_llm_check(task)
     model = settings.model,
     max_tokens = settings.max_tokens,
     temperature = settings.temperature,
-    response_format = { type = "json_object" },
     messages = {
       {
         role = 'system',
@@ -401,7 +694,7 @@ local function default_llm_check(task)
       },
       {
         role = 'user',
-        content = 'Subject: ' .. task:get_subject() or '',
+        content = 'Subject: ' .. (task:get_subject() or ''),
       },
       {
         role = 'user',
@@ -418,87 +711,92 @@ local function default_llm_check(task)
     }
   }
 
-  upstream = settings.upstreams:get_upstream_round_robin()
-  local http_params = {
-    url = settings.url,
-    mime_type = 'application/json',
-    timeout = settings.timeout,
-    log_obj = task,
-    callback = on_reply,
-    headers = {
-      ['Authorization'] = 'Bearer ' .. settings.api_key,
-    },
-    keepalive = true,
-    body = ucl.to_format(body, 'json-compact', true),
-    task = task,
-    upstream = upstream,
-    use_gzip = true,
-  }
-
-  rspamd_http.request(http_params)
-end
-
-local function ollama_check(task)
-  local ret, content = settings.condition(task)
+  -- Conditionally add response_format
+  if settings.include_response_format then
+    body.response_format = { type = "json_object" }
+  end
 
-  if not ret then
-    rspamd_logger.info(task, "skip checking gpt as the condition is not met: %s", content)
-    return
+  if type(settings.model) == 'string' then
+    settings.model = { settings.model }
   end
 
-  if not content then
-    lua_util.debugm(N, task, "no content to send to gpt classification")
-    return
+  upstream = settings.upstreams:get_upstream_round_robin()
+  for idx, model in ipairs(settings.model) do
+    results[idx] = {
+      success = false,
+      checked = false
+    }
+    body.model = model
+    local http_params = {
+      url = settings.url,
+      mime_type = 'application/json',
+      timeout = settings.timeout,
+      log_obj = task,
+      callback = gen_reply_closure(model, idx),
+      headers = {
+        ['Authorization'] = 'Bearer ' .. settings.api_key,
+      },
+      keepalive = true,
+      body = ucl.to_format(body, 'json-compact', true),
+      task = task,
+      upstream = upstream,
+      use_gzip = true,
+    }
+
+    if not rspamd_http.request(http_params) then
+      results[idx].checked = true
+    end
   end
+end
 
+local function ollama_check(task, content, sel_part)
   lua_util.debugm(N, task, "sending content to gpt: %s", content)
 
   local upstream
+  local results = {}
+
+  local function gen_reply_closure(model, idx)
+    return function(err, code, body)
+      results[idx].checked = true
+      if err then
+        rspamd_logger.errx(task, '%s: request failed: %s', model, err)
+        upstream:fail()
+        check_consensus_and_insert_results(task, results, sel_part)
+        return
+      end
 
-  local function on_reply(err, code, body)
+      upstream:ok()
+      lua_util.debugm(N, task, "%s: got reply: %s", model, body)
+      if code ~= 200 then
+        rspamd_logger.errx(task, 'bad reply: %s', body)
+        return
+      end
 
-    if err then
-      rspamd_logger.errx(task, 'request failed: %s', err)
-      upstream:fail()
-      return
-    end
+      local reply, reason = settings.reply_conversion(task, body)
 
-    upstream:ok()
-    lua_util.debugm(N, task, "got reply: %s", body)
-    if code ~= 200 then
-      rspamd_logger.errx(task, 'bad reply: %s', body)
-      return
-    end
+      results[idx].model = model
 
-    local reply = settings.reply_conversion(task, body)
-    if not reply then
-      return
-    end
-
-    if reply > 0.75 then
-      task:insert_result('GPT_SPAM', (reply - 0.75) * 4, tostring(reply))
-      if settings.autolearn then
-        task:set_flag("learn_spam")
+      if reply then
+        results[idx].success = true
+        results[idx].probability = reply
+        results[idx].reason = reason
       end
-    elseif reply < 0.25 then
-      task:insert_result('GPT_HAM', (0.25 - reply) * 4, tostring(reply))
-      if settings.autolearn then
-        task:set_flag("learn_ham")
-      end
-    else
-      lua_util.debugm(N, task, "uncertain result: %s", reply)
-    end
 
+      check_consensus_and_insert_results(task, results, sel_part)
+    end
   end
 
   local from_content, url_content = get_meta_llm_content(task)
 
+  if type(settings.model) == 'string' then
+    settings.model = { settings.model }
+  end
+
   local body = {
     stream = false,
     model = settings.model,
     max_tokens = settings.max_tokens,
     temperature = settings.temperature,
-    response_format = { type = "json_object" },
     messages = {
       {
         role = 'system',
@@ -523,50 +821,91 @@ local function ollama_check(task)
     }
   }
 
-  upstream = settings.upstreams:get_upstream_round_robin()
-  local http_params = {
-    url = settings.url,
-    mime_type = 'application/json',
-    timeout = settings.timeout,
-    log_obj = task,
-    callback = on_reply,
-    keepalive = true,
-    body = ucl.to_format(body, 'json-compact', true),
-    task = task,
-    upstream = upstream,
-    use_gzip = true,
-  }
+  for i, model in ipairs(settings.model) do
+    -- Conditionally add response_format
+    if settings.include_response_format then
+      body.response_format = { type = "json_object" }
+    end
+
+    results[i] = {
+      success = false,
+      checked = false
+    }
+    body.model = model
+
+    upstream = settings.upstreams:get_upstream_round_robin()
+    local http_params = {
+      url = settings.url,
+      mime_type = 'application/json',
+      timeout = settings.timeout,
+      log_obj = task,
+      callback = gen_reply_closure(model, i),
+      keepalive = true,
+      body = ucl.to_format(body, 'json-compact', true),
+      task = task,
+      upstream = upstream,
+      use_gzip = true,
+    }
 
-  rspamd_http.request(http_params)
+    rspamd_http.request(http_params)
+  end
 end
 
 local function gpt_check(task)
-  return settings.specific_check(task)
+  local ret, content, sel_part = settings.condition(task)
+
+  if not ret then
+    rspamd_logger.info(task, "skip checking gpt as the condition is not met: %s", content)
+    return
+  end
+
+  if not content then
+    lua_util.debugm(N, task, "no content to send to gpt classification")
+    return
+  end
+
+  if sel_part then
+    -- Check digest
+    check_llm_cached(task, content, sel_part)
+  else
+    check_llm_uncached(task, content)
+  end
 end
 
 local types_map = {
   openai = {
-    check = default_llm_check,
+    check = openai_check,
     condition = default_condition,
-    conversion = default_conversion,
+    conversion = function(is_json)
+      return is_json and default_openai_json_conversion or default_openai_plain_conversion
+    end,
     require_passkey = true,
   },
   ollama = {
     check = ollama_check,
     condition = default_condition,
-    conversion = ollama_conversion,
+    conversion = function(is_json)
+      return is_json and default_ollama_json_conversion or default_ollama_plain_conversion
+    end,
     require_passkey = false,
   },
 }
 
-local opts = rspamd_config:get_all_opt('gpt')
+local opts = rspamd_config:get_all_opt(N)
 if opts then
+  redis_params = lua_redis.parse_redis_server(N, opts)
   settings = lua_util.override_defaults(settings, opts)
 
-  if not settings.prompt then
-    settings.prompt = "You will be provided with the email message, subject, from and url domains, " ..
-        "and your task is to evaluate the probability to be spam as number from 0 to 1, " ..
-        "output result as JSON with 'probability' field."
+  if redis_params then
+    cache_context = lua_cache.create_cache_context(redis_params, settings, N)
+  end
+
+  if not settings.symbols_to_except then
+    settings.symbols_to_except = default_symbols_to_except
+  end
+
+  if not settings.extra_symbols then
+    settings.extra_symbols = default_extra_symbols
   end
 
   local llm_type = types_map[settings.type]
@@ -586,7 +925,7 @@ if opts then
   if settings.reply_conversion then
     settings.reply_conversion = load(settings.reply_conversion)()
   else
-    settings.reply_conversion = llm_type.conversion
+    settings.reply_conversion = llm_type.conversion(settings.json)
   end
 
   if not settings.api_key and llm_type.require_passkey then
@@ -610,7 +949,7 @@ if opts then
     name = 'GPT_SPAM',
     type = 'virtual',
     parent = id,
-    score = 5.0,
+    score = 3.0,
   })
   rspamd_config:register_symbol({
     name = 'GPT_HAM',
@@ -618,4 +957,35 @@ if opts then
     parent = id,
     score = -2.0,
   })
-end
-\ No newline at end of file
+
+  if settings.extra_symbols then
+    for sym, data in pairs(settings.extra_symbols) do
+      rspamd_config:register_symbol({
+        name = sym,
+        type = 'virtual',
+        parent = id,
+        score = data.score,
+        description = data.description,
+      })
+      data.name = sym
+      categories_map[data.category] = data
+    end
+  end
+
+  if not settings.prompt then
+    if settings.extra_symbols then
+      settings.prompt = "Analyze this email strictly as a spam detector given the email message, subject, " ..
+          "FROM and url domains. Evaluate spam probability (0-1). " ..
+          "Output ONLY 3 lines:\n" ..
+          "1. Numeric score (0.00-1.00)\n" ..
+          "2. One-sentence reason citing whether it is spam, the strongest red flag, or why it is ham\n" ..
+          "3. Primary concern category if found from the list: " .. table.concat(lua_util.keys(categories_map), ', ')
+    else
+      settings.prompt = "Analyze this email strictly as a spam detector given the email message, subject, " ..
+          "FROM and url domains. Evaluate spam probability (0-1). " ..
+          "Output ONLY 2 lines:\n" ..
+          "1. Numeric score (0.00-1.00)\n" ..
+          "2. One-sentence reason citing whether it is spam, the strongest red flag, or why it is ham\n"
+    end
+  end
+end