Merge pull request #5068 from rspamd/vstakhov-gpt-fixes

Some tunes and fixes for GPT
author: Vsevolod Stakhov <vsevolod@rspamd.com> 2024-07-22 20:10:31 +0600
committer: GitHub <noreply@github.com> 2024-07-22 20:10:31 +0600
commit: 747cc40faa0b49ee6371d492c5dfe083a5a5b47f (patch)
tree: c8cffc844ecd705c947f41a164891d0d296e71fc /src
parent: d94a2b8ae8842ff9525107bef3f29a814aae8fdb (diff)
parent: 10b3cefd4e8dd99c73cf11c838d80ad6238a0b57 (diff)
download: rspamd-747cc40faa0b49ee6371d492c5dfe083a5a5b47f.tar.gz
rspamd-747cc40faa0b49ee6371d492c5dfe083a5a5b47f.zip
1 files changed, 24 insertions, 13 deletions
diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua
index 6adbce3bf..61217cbf8 100644
--- a/src/plugins/lua/gpt.lua
+++ b/src/plugins/lua/gpt.lua
@@ -27,13 +27,11 @@ gpt {
   # Your key to access the API
   api_key = "xxx";
   # Model name
-  model = "gpt-3.5-turbo";
+  model = "gpt-4o-mini";
   # Maximum tokens to generate
   max_tokens = 1000;
   # Temperature for sampling
-  temperature = 0.7;
-  # Top p for sampling
-  top_p = 0.9;
+  temperature = 0.0;
   # Timeout for requests
   timeout = 10s;
   # Prompt for the model (use default if not set)
@@ -71,10 +69,9 @@ local default_symbols_to_except = {
 local settings = {
   type = 'openai',
   api_key = nil,
-  model = 'gpt-3.5-turbo',
+  model = 'gpt-4o-mini',
   max_tokens = 1000,
-  temperature = 0.7,
-  top_p = 0.9,
+  temperature = 0.0,
   timeout = 10,
   prompt = nil,
   condition = nil,
@@ -109,10 +106,12 @@ local function default_condition(task)
     if task:has_symbol(s) then
       if required_weight > 0 then
         -- Also check score
-        local sym = task:get_symbol(s)
+        local sym = task:get_symbol(s) or E
         -- Must exist as we checked it before with `has_symbol`
-        if math.abs(sym.weight) >= required_weight then
-          return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')'
+        if sym.weight then
+          if math.abs(sym.weight) >= required_weight then
+            return false, 'skip as "' .. s .. '" is found (weight: ' .. sym.weight .. ')'
+          end
         end
         lua_util.debugm(N, task, 'symbol %s has weight %s, but required %s', s,
             sym.weight, required_weight)
@@ -195,6 +194,18 @@ local function default_conversion(task, input)
 
   if type(reply) == 'table' and reply.probability then
     local spam_score = tonumber(reply.probability)
+
+    if not spam_score then
+      -- Maybe we need GPT to convert GPT reply here?
+      if reply.probability == "high" then
+        spam_score = 0.9
+      elseif reply.probability == "low" then
+        spam_score = 0.1
+      else
+        rspamd_logger.infox("cannot convert to spam probability: %s", reply.probability)
+      end
+    end
+
     if type(reply.usage) == 'table' then
       rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
     end
@@ -276,7 +287,7 @@ local function openai_gpt_check(task)
     model = settings.model,
     max_tokens = settings.max_tokens,
     temperature = settings.temperature,
-    top_p = settings.top_p,
+    response_format = { type = "json_object" },
     messages = {
       {
         role = 'system',
@@ -348,8 +359,8 @@ if opts then
   end
 
   if not settings.prompt then
-    settings.prompt = "You will be provided with the email message, " ..
-        "and your task is to classify its probability to be spam, " ..
+    settings.prompt = "You will be provided with the email message, subject, from and url domains, " ..
+        "and your task is to evaluate the probability to be spam as number from 0 to 1, " ..
         "output result as JSON with 'probability' field."
   end
author	Vsevolod Stakhov <vsevolod@rspamd.com>	2024-07-22 20:10:31 +0600
committer	GitHub <noreply@github.com>	2024-07-22 20:10:31 +0600
commit	747cc40faa0b49ee6371d492c5dfe083a5a5b47f (patch)
tree	c8cffc844ecd705c947f41a164891d0d296e71fc /src
parent	d94a2b8ae8842ff9525107bef3f29a814aae8fdb (diff)
parent	10b3cefd4e8dd99c73cf11c838d80ad6238a0b57 (diff)
download	rspamd-747cc40faa0b49ee6371d492c5dfe083a5a5b47f.tar.gz rspamd-747cc40faa0b49ee6371d492c5dfe083a5a5b47f.zip