diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-03-23 14:50:24 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-03-23 14:50:24 +0000 |
commit | f605d670505baad46b8ef4cdfa3dc32f48d4150e (patch) | |
tree | 04f959e423960adb60c99dc0c35cb3a10e5d8595 /src/libserver | |
parent | 7299efb5eeddde80511be8a6285d94c333fc8ea3 (diff) | |
download | rspamd-f605d670505baad46b8ef4cdfa3dc32f48d4150e.tar.gz rspamd-f605d670505baad46b8ef4cdfa3dc32f48d4150e.zip |
[Rework] URL: Another update for urls extraction logic
URL extraction from HTML parts should look like this:
1. Extract href links
2. Convert HTML to plain text and extract:
a) (http|https|ftp)://foo.bar and www.foo
b) email like strings \bfoo@bar.baz\b .
For all extracted strings check if we have host with a domain from the public suffix.
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/url.c | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index 0669d932f..6aceb8fa6 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -63,10 +63,10 @@ typedef struct url_match_s { gchar st; } url_match_t; -#define URL_FLAG_NOHTML (1 << 0) -#define URL_FLAG_TLD_MATCH (1 << 1) -#define URL_FLAG_STAR_MATCH (1 << 2) -#define URL_FLAG_REGEXP (1 << 3) +#define URL_FLAG_NOHTML (1u << 0u) +#define URL_FLAG_TLD_MATCH (1u << 1u) +#define URL_FLAG_STAR_MATCH (1u << 2u) +#define URL_FLAG_REGEXP (1u << 3u) struct url_callback_data; @@ -206,12 +206,12 @@ struct url_matcher static_matchers[] = { {"sip:", "", url_web_start, url_web_end, 0}, {"www.", "http://", url_web_start, url_web_end, - URL_FLAG_NOHTML}, + 0}, {"ftp.", "ftp://", url_web_start, url_web_end, - URL_FLAG_NOHTML}, + 0}, /* Likely emails */ {"@", "mailto://", url_email_start, url_email_end, - URL_FLAG_NOHTML} + 0} }; |