]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Lua_magic: Try to reduce fp rate for html heuristic
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 2 Jul 2023 16:33:28 +0000 (17:33 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 2 Jul 2023 16:33:28 +0000 (17:33 +0100)
lualib/lua_magic/heuristics.lua

index ea8f08e081ae2b390aa2157ca559366d9447fb8f..98cfb0eee9ead2314228b60e9a3d91584969155f 100644 (file)
@@ -63,13 +63,13 @@ local zip_patterns = {
 local txt_trie
 local txt_patterns = {
   html = {
-    {[[(?i)<html\b]], 32},
+    {[=[(?i)<html[\s>]]=], 32},
     {[[(?i)<script\b]], 20}, -- Commonly used by spammers
     {[[<script\s+type="text\/javascript">]], 31}, -- Another spammy pattern
     {[[(?i)<\!DOCTYPE HTML\b]], 33},
     {[[(?i)<body\b]], 20},
     {[[(?i)<table\b]], 20},
-    {[[(?i)<a\b]], 10},
+    {[[(?i)<a\s]], 10},
     {[[(?i)<p\b]], 10},
     {[[(?i)<div\b]], 10},
     {[[(?i)<span\b]], 10},