diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-14 15:26:19 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-14 15:26:19 +0100 |
commit | 91862ccc09399445debbeac90b65041c502d7f5d (patch) | |
tree | 314759e5d91846e471c0458c8364596672e07272 /src | |
parent | 4d1669e4b40e8a8e93ab652a60ed35ea64cf14da (diff) | |
download | rspamd-91862ccc09399445debbeac90b65041c502d7f5d.tar.gz rspamd-91862ccc09399445debbeac90b65041c502d7f5d.zip |
[Feature] Use more clever url matcher for hyperscan
Diffstat (limited to 'src')
-rw-r--r-- | src/libserver/url.c | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index 95542c1f0..d5bddcfb5 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -61,6 +61,7 @@ typedef struct url_match_s { #define URL_FLAG_NOHTML (1 << 0) #define URL_FLAG_TLD_MATCH (1 << 1) #define URL_FLAG_STAR_MATCH (1 << 2) +#define URL_FLAG_REGEXP (1 << 3) struct url_callback_data; @@ -145,8 +146,14 @@ struct url_matcher static_matchers[] = { {"ftp.", "ftp://", url_web_start, url_web_end, URL_FLAG_NOHTML, 0}, /* Likely emails */ +#ifdef WITH_HYPERSCAN + {"\\b[\\w._%+-]+@[\\w.-]+\\.\\p{L}{2,}\\b", "mailto://", + url_email_start, url_email_end, + URL_FLAG_NOHTML | URL_FLAG_REGEXP, 0} +#else {"@", "mailto://", url_email_start, url_email_end, URL_FLAG_NOHTML, 0} +#endif }; struct url_callback_data { @@ -412,9 +419,18 @@ rspamd_url_add_static_matchers (struct url_match_scanner *sc) g_array_append_vals (sc->matchers, static_matchers, n); for (i = 0; i < n; i++) { - rspamd_multipattern_add_pattern (url_scanner->search_trie, - static_matchers[i].pattern, - RSPAMD_MULTIPATTERN_ICASE); + if (static_matchers[i].flags & URL_FLAG_REGEXP) { + rspamd_multipattern_add_pattern (url_scanner->search_trie, + static_matchers[i].pattern, + RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8|RSPAMD_MULTIPATTERN_RE); + } + else { + rspamd_multipattern_add_pattern (url_scanner->search_trie, + static_matchers[i].pattern, + RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8); + } + + static_matchers[i].patlen = strlen (static_matchers[i].pattern); } } |