From 1ed9f282a568ef64372f687dba5ca25033b0ce2b Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 27 Apr 2016 16:05:15 +0100 Subject: [Fix] Fix and rescore R_PARTS_DIFFER logic Signed-off-by: Vsevolod Stakhov --- rules/misc.lua | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'rules') diff --git a/rules/misc.lua b/rules/misc.lua index 9bd6b85f6..7bfe73551 100644 --- a/rules/misc.lua +++ b/rules/misc.lua @@ -33,15 +33,24 @@ reconf['R_FLASH_REDIR_IMGSHACK'] = '/^(?:http:\\/\\/)?img\\d{1,5}\\.imageshack\\ -- Different text parts rspamd_config.R_PARTS_DIFFER = function(task) - local distance = task:get_mempool():get_variable('parts_distance', 'int') + local distance = task:get_mempool():get_variable('parts_distance', 'double') if distance then local nd = tonumber(distance) - - if nd < 50 then - local score = 1 - util.tanh(nd / 100.0) - - task:insert_result('R_PARTS_DIFFER', score, tostring(nd) .. '%') + -- ND is relation of different words to total words + if nd >= 0.5 then + local tw = task:get_mempool():get_variable('total_words', 'int') + + if tw then + if tw > 30 then + -- We are confident about difference + local score = (nd - 0.5) * 2.0 + else + -- We are not so confident about difference + local score = (nd - 0.5) + end + task:insert_result('R_PARTS_DIFFER', score, tostring(100.0 * nd) .. '%') + end end end -- cgit v1.2.3