diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-07-22 20:10:31 +0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-22 20:10:31 +0600 |
commit | 747cc40faa0b49ee6371d492c5dfe083a5a5b47f (patch) | |
tree | c8cffc844ecd705c947f41a164891d0d296e71fc /src | |
parent | d94a2b8ae8842ff9525107bef3f29a814aae8fdb (diff) | |
parent | 10b3cefd4e8dd99c73cf11c838d80ad6238a0b57 (diff) | |
download | rspamd-747cc40faa0b49ee6371d492c5dfe083a5a5b47f.tar.gz rspamd-747cc40faa0b49ee6371d492c5dfe083a5a5b47f.zip |
Merge pull request #5068 from rspamd/vstakhov-gpt-fixes
Some tunes and fixes for GPT
Diffstat (limited to 'src')
-rw-r--r-- | src/plugins/lua/gpt.lua | 37 |
1 files changed, 24 insertions, 13 deletions
diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua index 6adbce3bf..61217cbf8 100644 --- a/src/plugins/lua/gpt.lua +++ b/src/plugins/lua/gpt.lua @@ -27,13 +27,11 @@ gpt { # Your key to access the API api_key = "xxx"; # Model name - model = "gpt-3.5-turbo"; + model = "gpt-4o-mini"; # Maximum tokens to generate max_tokens = 1000; # Temperature for sampling - temperature = 0.7; - # Top p for sampling - top_p = 0.9; + temperature = 0.0; # Timeout for requests timeout = 10s; # Prompt for the model (use default if not set) @@ -71,10 +69,9 @@ local default_symbols_to_except = { local settings = { type = 'openai', api_key = nil, - model = 'gpt-3.5-turbo', + model = 'gpt-4o-mini', max_tokens = 1000, - temperature = 0.7, - top_p = 0.9, + temperature = 0.0, timeout = 10, prompt = nil, condition = nil, @@ -109,10 +106,12 @@ local function default_condition(task) if task:has_symbol(s) then if required_weight > 0 then -- Also check score - local sym = task:get_symbol(s) + local sym = task:get_symbol(s) or E -- Must exist as we checked it before with `has_symbol` - if math.abs(sym.weight) >= required_weight then - return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')' + if sym.weight then + if math.abs(sym.weight) >= required_weight then + return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')' + end end lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s, sym.weight, required_weight) @@ -195,6 +194,18 @@ local function default_conversion(task, input) if type(reply) == 'table' and reply.probability then local spam_score = tonumber(reply.probability) + + if not spam_score then + -- Maybe we need GPT to convert GPT reply here? + if reply.probability == "high" then + spam_score = 0.9 + elseif reply.probability == "low" then + spam_score = 0.1 + else + rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability) + end + end + if type(reply.usage) == 'table' then rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens) end @@ -276,7 +287,7 @@ local function openai_gpt_check(task) model = settings.model, max_tokens = settings.max_tokens, temperature = settings.temperature, - top_p = settings.top_p, + response_format = { type = "json_object" }, messages = { { role = 'system', @@ -348,8 +359,8 @@ if opts then end if not settings.prompt then - settings.prompt = "You will be provided with the email message, " .. - "and your task is to classify its probability to be spam, " .. + settings.prompt = "You will be provided with the email message, subject, from and url domains, " .. + "and your task is to evaluate the probability to be spam as number from 0 to 1, " .. "output result as JSON with 'probability' field." end |