summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-07-22 20:10:31 +0600
committerGitHub <noreply@github.com>2024-07-22 20:10:31 +0600
commit747cc40faa0b49ee6371d492c5dfe083a5a5b47f (patch)
treec8cffc844ecd705c947f41a164891d0d296e71fc /src
parentd94a2b8ae8842ff9525107bef3f29a814aae8fdb (diff)
parent10b3cefd4e8dd99c73cf11c838d80ad6238a0b57 (diff)
downloadrspamd-747cc40faa0b49ee6371d492c5dfe083a5a5b47f.tar.gz
rspamd-747cc40faa0b49ee6371d492c5dfe083a5a5b47f.zip
Merge pull request #5068 from rspamd/vstakhov-gpt-fixes
Some tunes and fixes for GPT
Diffstat (limited to 'src')
-rw-r--r--src/plugins/lua/gpt.lua37
1 files changed, 24 insertions, 13 deletions
diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua
index 6adbce3bf..61217cbf8 100644
--- a/src/plugins/lua/gpt.lua
+++ b/src/plugins/lua/gpt.lua
@@ -27,13 +27,11 @@ gpt {
# Your key to access the API
api_key = "xxx";
# Model name
- model = "gpt-3.5-turbo";
+ model = "gpt-4o-mini";
# Maximum tokens to generate
max_tokens = 1000;
# Temperature for sampling
- temperature = 0.7;
- # Top p for sampling
- top_p = 0.9;
+ temperature = 0.0;
# Timeout for requests
timeout = 10s;
# Prompt for the model (use default if not set)
@@ -71,10 +69,9 @@ local default_symbols_to_except = {
local settings = {
type = 'openai',
api_key = nil,
- model = 'gpt-3.5-turbo',
+ model = 'gpt-4o-mini',
max_tokens = 1000,
- temperature = 0.7,
- top_p = 0.9,
+ temperature = 0.0,
timeout = 10,
prompt = nil,
condition = nil,
@@ -109,10 +106,12 @@ local function default_condition(task)
if task:has_symbol(s) then
if required_weight > 0 then
-- Also check score
- local sym = task:get_symbol(s)
+ local sym = task:get_symbol(s) or E
-- Must exist as we checked it before with `has_symbol`
- if math.abs(sym.weight) >= required_weight then
- return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')'
+ if sym.weight then
+ if math.abs(sym.weight) >= required_weight then
+ return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')'
+ end
end
lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
sym.weight, required_weight)
@@ -195,6 +194,18 @@ local function default_conversion(task, input)
if type(reply) == 'table' and reply.probability then
local spam_score = tonumber(reply.probability)
+
+ if not spam_score then
+ -- Maybe we need GPT to convert GPT reply here?
+ if reply.probability == "high" then
+ spam_score = 0.9
+ elseif reply.probability == "low" then
+ spam_score = 0.1
+ else
+ rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability)
+ end
+ end
+
if type(reply.usage) == 'table' then
rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
end
@@ -276,7 +287,7 @@ local function openai_gpt_check(task)
model = settings.model,
max_tokens = settings.max_tokens,
temperature = settings.temperature,
- top_p = settings.top_p,
+ response_format = { type = "json_object" },
messages = {
{
role = 'system',
@@ -348,8 +359,8 @@ if opts then
end
if not settings.prompt then
- settings.prompt = "You will be provided with the email message, " ..
- "and your task is to classify its probability to be spam, " ..
+ settings.prompt = "You will be provided with the email message, subject, from and url domains, " ..
+ "and your task is to evaluate the probability to be spam as number from 0 to 1, " ..
"output result as JSON with 'probability' field."
end