aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2023-07-02 17:33:28 +0100
committerVsevolod Stakhov <vsevolod@rspamd.com>2023-07-02 17:33:28 +0100
commit42873dc11090b0ce0c6f49333a73123fa5de05cb (patch)
treea7fffa50cb21fba50ddab14df7551b09fa262f4b
parentc241116bcfed5715027826bd766ce501ccb4dfcb (diff)
downloadrspamd-42873dc11090b0ce0c6f49333a73123fa5de05cb.tar.gz
rspamd-42873dc11090b0ce0c6f49333a73123fa5de05cb.zip
[Minor] Lua_magic: Try to reduce fp rate for html heuristic
-rw-r--r--lualib/lua_magic/heuristics.lua4
1 files changed, 2 insertions, 2 deletions
diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index ea8f08e08..98cfb0eee 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -63,13 +63,13 @@ local zip_patterns = {
local txt_trie
local txt_patterns = {
html = {
- {[[(?i)<html\b]], 32},
+ {[=[(?i)<html[\s>]]=], 32},
{[[(?i)<script\b]], 20}, -- Commonly used by spammers
{[[<script\s+type="text\/javascript">]], 31}, -- Another spammy pattern
{[[(?i)<\!DOCTYPE HTML\b]], 33},
{[[(?i)<body\b]], 20},
{[[(?i)<table\b]], 20},
- {[[(?i)<a\b]], 10},
+ {[[(?i)<a\s]], 10},
{[[(?i)<p\b]], 10},
{[[(?i)<div\b]], 10},
{[[(?i)<span\b]], 10},