diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-07-02 17:33:28 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-07-02 17:33:28 +0100 |
commit | 42873dc11090b0ce0c6f49333a73123fa5de05cb (patch) | |
tree | a7fffa50cb21fba50ddab14df7551b09fa262f4b | |
parent | c241116bcfed5715027826bd766ce501ccb4dfcb (diff) | |
download | rspamd-42873dc11090b0ce0c6f49333a73123fa5de05cb.tar.gz rspamd-42873dc11090b0ce0c6f49333a73123fa5de05cb.zip |
[Minor] Lua_magic: Try to reduce fp rate for html heuristic
-rw-r--r-- | lualib/lua_magic/heuristics.lua | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua index ea8f08e08..98cfb0eee 100644 --- a/lualib/lua_magic/heuristics.lua +++ b/lualib/lua_magic/heuristics.lua @@ -63,13 +63,13 @@ local zip_patterns = { local txt_trie local txt_patterns = { html = { - {[[(?i)<html\b]], 32}, + {[=[(?i)<html[\s>]]=], 32}, {[[(?i)<script\b]], 20}, -- Commonly used by spammers {[[<script\s+type="text\/javascript">]], 31}, -- Another spammy pattern {[[(?i)<\!DOCTYPE HTML\b]], 33}, {[[(?i)<body\b]], 20}, {[[(?i)<table\b]], 20}, - {[[(?i)<a\b]], 10}, + {[[(?i)<a\s]], 10}, {[[(?i)<p\b]], 10}, {[[(?i)<div\b]], 10}, {[[(?i)<span\b]], 10}, |