]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Lua_magic: Improve html patterns
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 25 Sep 2020 09:58:45 +0000 (10:58 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 25 Sep 2020 09:58:45 +0000 (10:58 +0100)
lualib/lua_magic/heuristics.lua

index d36b5633fea9ffe557b0f29395a20b91d1511bc0..e623c09c138661ed5aa81c7d848767593cbbb738 100644 (file)
@@ -61,15 +61,16 @@ local zip_patterns = {
 local txt_trie
 local txt_patterns = {
   html = {
-    {[[(?i)\s*<html]], 30},
-    {[[(?i)\s*<\!DOCTYPE HTML]], 30},
-    {[[(?i)\s*<xml]], 20},
-    {[[(?i)\s*<body]], 20},
-    {[[(?i)\s*<table]], 20},
-    {[[(?i)\s*<a]], 10},
-    {[[(?i)\s*<p]], 10},
-    {[[(?i)\s*<div]], 10},
-    {[[(?i)\s*<span]], 10},
+    {[[(?i)\s*<html\b]], 30},
+    {[[(?i)\s*<script\b]], 20}, -- Commonly used by spammers
+    {[[(?i)\s*<\!DOCTYPE HTML\b]], 30},
+    {[[(?i)\s*<xml\b]], 20},
+    {[[(?i)\s*<body\b]], 20},
+    {[[(?i)\s*<table\b]], 20},
+    {[[(?i)\s*<a\b]], 10},
+    {[[(?i)\s*<p\b]], 10},
+    {[[(?i)\s*<div\b]], 10},
+    {[[(?i)\s*<span\b]], 10},
   },
   csv = {
     {[[(?:[-a-zA-Z0-9_]+\s*,){2,}(?:[-a-zA-Z0-9_]+,?[ ]*[\r\n])]], 20}