diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-07-23 12:57:31 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-07-23 12:57:31 +0100 |
commit | 379055dbbb4af997b4d3ffb161d447872d7ca357 (patch) | |
tree | 3774553d470f93e12ddeb454aad9b3b607cf8918 /src/libserver/url.c | |
parent | 602ae7a0b7e215ba2677131b8fdc70abc156b3ca (diff) | |
download | rspamd-379055dbbb4af997b4d3ffb161d447872d7ca357.tar.gz rspamd-379055dbbb4af997b4d3ffb161d447872d7ca357.zip |
Unify style without sorting headers.
Diffstat (limited to 'src/libserver/url.c')
-rw-r--r-- | src/libserver/url.c | 1313 |
1 files changed, 852 insertions, 461 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index c4313e8a9..df4e3102d 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -38,16 +38,16 @@ #define HIGHEST_PORT 65535 #define uri_port_is_valid(port) \ - (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT) + (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT) struct _proto { - guchar *name; - gint port; - uintptr_t *unused; - guint need_slashes:1; - guint need_slash_after_host:1; - guint free_syntax:1; - guint need_ssl:1; + guchar *name; + gint port; + uintptr_t *unused; + guint need_slashes : 1; + guint need_slash_after_host : 1; + guint free_syntax : 1; + guint need_ssl : 1; }; typedef struct url_match_s { @@ -64,314 +64,627 @@ typedef struct url_match_s { struct url_matcher { const gchar *pattern; const gchar *prefix; - gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - gboolean (*end)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, + url_match_t *match); + gboolean (*end)(const gchar *begin, const gchar *end, const gchar *pos, + url_match_t *match); gint flags; }; -static gboolean url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); -static gboolean url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - -static gboolean url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); -static gboolean url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - -static gboolean url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); -static gboolean url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - -static gboolean url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); -static gboolean url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); +static gboolean url_file_start (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match); +static gboolean url_file_end (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match); + +static gboolean url_web_start (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match); +static gboolean url_web_end (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match); + +static gboolean url_tld_start (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match); +static gboolean url_tld_end (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match); + +static gboolean url_email_start (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match); +static gboolean url_email_end (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match); struct url_matcher matchers[] = { - /* Common prefixes */ - { "file://", "", url_file_start, url_file_end, 0 }, - { "ftp://", "", url_web_start, url_web_end, 0 }, - { "sftp://", "", url_web_start, url_web_end, 0 }, - { "http://", "", url_web_start, url_web_end, 0 }, - { "https://", "", url_web_start, url_web_end, 0 }, - { "news://", "", url_web_start, url_web_end, 0 }, - { "nntp://", "", url_web_start, url_web_end, 0 }, - { "telnet://", "", url_web_start, url_web_end, 0 }, - { "webcal://", "", url_web_start, url_web_end, 0 }, - { "mailto://", "", url_email_start, url_email_end, 0 }, - { "callto://", "", url_web_start, url_web_end, 0 }, - { "h323:", "", url_web_start, url_web_end, 0 }, - { "sip:", "", url_web_start, url_web_end, 0 }, - { "www.", "http://", url_web_start, url_web_end, 0 }, - { "ftp.", "ftp://", url_web_start, url_web_end, URL_FLAG_NOHTML }, - /* TLD domains parts */ - { ".ac", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ad", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ae", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".aero", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".af", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ag", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ai", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".al", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".am", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".an", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ao", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".aq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ar", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".arpa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".as", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".asia", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".at", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".au", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".aw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ax", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".az", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ba", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".be", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".biz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".br", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".by", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ca", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cat", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ch", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ci", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ck", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".co", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".com", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".coop", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".de", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".dj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".dk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".dm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".do", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".dz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ec", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".edu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ee", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".eg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".er", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".es", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".et", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".eu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ga", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ge", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gov", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ht", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".id", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ie", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".il", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".im", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".in", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".info", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".int", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".io", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".iq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ir", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".is", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".it", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".je", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".jm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".jo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".jobs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".jp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ke", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ki", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".km", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ky", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".la", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".li", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ls", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ly", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ma", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".md", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".me", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mil", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ml", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mobi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ms", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".museum", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".my", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".na", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".name", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ne", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".net", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ng", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ni", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".no", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".np", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".om", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".org", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pe", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ph", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ps", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".py", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".qa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".re", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".rs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ru", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".rw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".se", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".si", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".so", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".st", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".su", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".td", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".th", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".to", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".travel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ua", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ug", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".uk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".us", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".uy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".uz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".va", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ve", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".wf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ws", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".xxx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ye", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".yt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".za", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".zm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".zw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - /* Likely emails */ - { "@", "mailto://",url_email_start, url_email_end, URL_FLAG_NOHTML } + /* Common prefixes */ + { "file://", "", url_file_start, url_file_end, + 0 }, + { "ftp://", "", url_web_start, url_web_end, + 0 }, + { "sftp://", "", url_web_start, url_web_end, + 0 }, + { "http://", "", url_web_start, url_web_end, + 0 }, + { "https://", "", url_web_start, url_web_end, + 0 }, + { "news://", "", url_web_start, url_web_end, + 0 }, + { "nntp://", "", url_web_start, url_web_end, + 0 }, + { "telnet://", "", url_web_start, url_web_end, + 0 }, + { "webcal://", "", url_web_start, url_web_end, + 0 }, + { "mailto://", "", url_email_start, url_email_end, + 0 }, + { "callto://", "", url_web_start, url_web_end, + 0 }, + { "h323:", "", url_web_start, url_web_end, + 0 }, + { "sip:", "", url_web_start, url_web_end, + 0 }, + { "www.", "http://", url_web_start, url_web_end, + 0 }, + { "ftp.", "ftp://", url_web_start, url_web_end, + URL_FLAG_NOHTML }, + /* TLD domains parts */ + { ".ac", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ad", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ae", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".aero", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".af", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ag", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ai", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".al", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".am", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".an", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ao", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".aq", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ar", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".arpa", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".as", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".asia", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".at", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".au", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".aw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ax", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".az", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ba", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bb", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bd", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".be", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bf", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bh", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bi", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".biz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bj", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bo", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".br", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bs", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bt", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bv", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".by", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ca", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cat", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cc", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cd", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cf", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ch", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ci", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ck", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cl", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".co", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".com", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".coop", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cu", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cv", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cx", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cy", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".de", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dj", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dk", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".do", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ec", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".edu", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ee", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".eg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".er", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".es", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".et", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".eu", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fi", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fj", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fk", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fo", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ga", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gb", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gd", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ge", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gf", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gh", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gi", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gl", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gov", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gp", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gq", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gs", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gt", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gu", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gy", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hk", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ht", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hu", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".id", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ie", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".il", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".im", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".in", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".info", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".int", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".io", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".iq", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ir", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".is", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".it", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".je", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jo", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jobs", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jp", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ke", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kh", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ki", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".km", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kp", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ky", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".la", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lb", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lc", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".li", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lk", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ls", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lt", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lu", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lv", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ly", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ma", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mc", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".md", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".me", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mh", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mil", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mk", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ml", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mo", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mobi", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mp", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mq", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ms", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mt", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mu", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".museum", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mv", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mx", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".my", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".na", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".name", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nc", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ne", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".net", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nf", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ng", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ni", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nl", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".no", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".np", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nu", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".om", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".org", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pa", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pe", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pf", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ph", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pk", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pl", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pro", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ps", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pt", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".py", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".qa", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".re", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ro", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".rs", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ru", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".rw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sa", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sb", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sc", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sd", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".se", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sh", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".si", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sj", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sk", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sl", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".so", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".st", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".su", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sv", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sx", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sy", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tc", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".td", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tel", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tf", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".th", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tj", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tk", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tl", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".to", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tp", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tr", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".travel", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tt", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tv", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ua", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ug", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".uk", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".us", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".uy", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".uz", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".va", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vc", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ve", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vg", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vi", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vn", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vu", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".wf", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ws", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".xxx", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ye", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".yt", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".za", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".zm", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".zw", "http://", url_tld_start, url_tld_end, + URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + /* Likely emails */ + { "@", "mailto://",url_email_start, url_email_end, + URL_FLAG_NOHTML } }; struct url_match_scanner { @@ -382,7 +695,7 @@ struct url_match_scanner { struct url_match_scanner *url_scanner = NULL; -static const struct _proto protocol_backends[] = { +static const struct _proto protocol_backends[] = { {"file", 0, NULL, 1, 0, 0, 0}, {"ftp", 21, NULL, 1, 0, 0, 0}, {"http", 80, NULL, 1, 0, 0, 0}, @@ -395,7 +708,8 @@ static const struct _proto protocol_backends[] = { /* Convert an ASCII hex digit to the corresponding number between 0 and 15. H should be a hexadecimal digit that satisfies isxdigit; otherwise, the result is undefined. */ -#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : g_ascii_toupper (h) - 'A' + 10) +#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : g_ascii_toupper (h) - 'A' + \ + 10) #define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2)) /* The reverse of the above: convert a number in the [0, 16) range to the ASCII representation of the corresponding hexadecimal digit. @@ -404,45 +718,47 @@ static const struct _proto protocol_backends[] = { #define XNUM_TO_digit(x) ("0123456789abcdef"[x] + 0) static guchar url_scanner_table[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 24,128,160,128,128,128,128,128,160,160,128,128,160,192,160,160, - 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,160,160, 32,128, 32,128, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 24,128,160,128,128,128,128,128,160,160,128,128,160,192,160,160, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,160,160, 32,128, 32,128, 160, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, - 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,192, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,192, 128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, - 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; enum { - IS_CTRL = (1 << 0), - IS_ALPHA = (1 << 1), - IS_DIGIT = (1 << 2), - IS_LWSP = (1 << 3), - IS_SPACE = (1 << 4), - IS_SPECIAL = (1 << 5), - IS_DOMAIN = (1 << 6), - IS_URLSAFE = (1 << 7) + IS_CTRL = (1 << 0), + IS_ALPHA = (1 << 1), + IS_DIGIT = (1 << 2), + IS_LWSP = (1 << 3), + IS_SPACE = (1 << 4), + IS_SPECIAL = (1 << 5), + IS_DOMAIN = (1 << 6), + IS_URLSAFE = (1 << 7) }; #define is_ctrl(x) ((url_scanner_table[(guchar)(x)] & IS_CTRL) != 0) #define is_lwsp(x) ((url_scanner_table[(guchar)(x)] & IS_LWSP) != 0) -#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL|IS_SPACE|IS_CTRL)) == 0) +#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL | IS_SPACE | \ + IS_CTRL)) == 0) #define is_alpha(x) ((url_scanner_table[(guchar)(x)] & IS_ALPHA) != 0) #define is_digit(x) ((url_scanner_table[(guchar)(x)] & IS_DIGIT) != 0) #define is_domain(x) ((url_scanner_table[(guchar)(x)] & IS_DOMAIN) != 0) -#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA|IS_DIGIT|IS_URLSAFE)) != 0) +#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA | IS_DIGIT | \ + IS_URLSAFE)) != 0) -const gchar * +const gchar * url_strerror (enum uri_errno err) { switch (err) { @@ -477,7 +793,7 @@ url_strerror (enum uri_errno err) static gint check_uri_file (gchar *name) { - static const gchar chars[] = POST_CHAR_S "#?"; + static const gchar chars[] = POST_CHAR_S "#?"; return strcspn (name, chars); } @@ -485,30 +801,41 @@ check_uri_file (gchar *name) static gint url_init (void) { - guint i; - gchar patbuf[128]; + guint i; + gchar patbuf[128]; if (url_scanner == NULL) { url_scanner = g_malloc (sizeof (struct url_match_scanner)); url_scanner->matchers = matchers; url_scanner->matchers_count = G_N_ELEMENTS (matchers); url_scanner->patterns = rspamd_trie_create (TRUE); - for (i = 0; i < url_scanner->matchers_count; i ++) { + for (i = 0; i < url_scanner->matchers_count; i++) { if (matchers[i].flags & URL_FLAG_STRICT_MATCH) { /* Insert more specific patterns */ /* some.tld/ */ - rspamd_snprintf (patbuf, sizeof (patbuf), "%s/", matchers[i].pattern); + rspamd_snprintf (patbuf, + sizeof (patbuf), + "%s/", + matchers[i].pattern); rspamd_trie_insert (url_scanner->patterns, patbuf, i); /* some.tld */ - rspamd_snprintf (patbuf, sizeof (patbuf), "%s ", matchers[i].pattern); + rspamd_snprintf (patbuf, + sizeof (patbuf), + "%s ", + matchers[i].pattern); rspamd_trie_insert (url_scanner->patterns, patbuf, i); /* some.tld: */ - rspamd_snprintf (patbuf, sizeof (patbuf), "%s:", matchers[i].pattern); + rspamd_snprintf (patbuf, + sizeof (patbuf), + "%s:", + matchers[i].pattern); rspamd_trie_insert (url_scanner->patterns, patbuf, i); } else { - rspamd_trie_insert (url_scanner->patterns, matchers[i].pattern, i); + rspamd_trie_insert (url_scanner->patterns, + matchers[i].pattern, + i); } } } @@ -522,10 +849,10 @@ get_protocol (gchar *name, gint namelen) /* These are really enum protocol values but can take on negative * values and since 0 <= -1 for enum values it's better to use clean * integer type. */ - gint start, end; - enum protocol protocol; - guchar *pname; - gint pnamelen, minlen, compare; + gint start, end; + enum protocol protocol; + guchar *pname; + gint pnamelen, minlen, compare; /* Almost dichotomic search is used here */ /* Starting at the HTTP entry which is the most common that will make @@ -590,7 +917,7 @@ get_protocol_free_syntax (enum protocol protocol) static gint get_protocol_length (const gchar *url) { - gchar *end = (gchar *)url; + gchar *end = (gchar *)url; /* Seek the end of the protocol name if any. */ /* RFC1738: @@ -610,11 +937,12 @@ get_protocol_length (const gchar *url) static guint url_calculate_escaped_hostlen (gchar *host, guint hostlen) { - guint i, result = hostlen; - gchar *p = host, c; + guint i, result = hostlen; + gchar *p = host, c; for (i = 0; i < hostlen; i++, p++) { - if (*p == '%' && g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2)) && i < hostlen - 2) { + if (*p == '%' && g_ascii_isxdigit (*(p + 1)) && + g_ascii_isxdigit (*(p + 2)) && i < hostlen - 2) { c = X2DIGITS_TO_NUM (*(p + 1), *(p + 2)); if (c != '\0') { result -= 2; @@ -637,18 +965,19 @@ url_calculate_escaped_hostlen (gchar *host, guint hostlen) static void url_unescape (gchar *s) { - gchar *t = s; /* t - tortoise */ - gchar *h = s; /* h - hare */ + gchar *t = s; /* t - tortoise */ + gchar *h = s; /* h - hare */ for (; *h; h++, t++) { if (*h != '%') { - copychar: +copychar: *t = *h; } else { - gchar c; + gchar c; /* Do nothing if '%' is not followed by two hex digits. */ - if (!h[1] || !h[2] || !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2]))) + if (!h[1] || !h[2] || + !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2]))) goto copychar; c = X2DIGITS_TO_NUM (h[1], h[2]); /* Don't unescape %00 because there is no way to insert it @@ -665,8 +994,8 @@ url_unescape (gchar *s) static void url_strip (gchar *s) { - gchar *t = s; /* t - tortoise */ - gchar *h = s; /* h - hare */ + gchar *t = s; /* t - tortoise */ + gchar *h = s; /* h - hare */ while (*h) { if (g_ascii_isgraph (*h)) { @@ -678,17 +1007,17 @@ url_strip (gchar *s) *t = '\0'; } -static gchar * +static gchar * url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool) { - const gchar *p1; - gchar *p2, *newstr; - gint newlen; - gint addition = 0; + const gchar *p1; + gchar *p2, *newstr; + gint newlen; + gint addition = 0; for (p1 = s; *p1; p1++) if (!is_urlsafe (*p1)) { - addition += 2; /* Two more characters (hex digits) */ + addition += 2; /* Two more characters (hex digits) */ } if (!addition) { @@ -708,7 +1037,7 @@ url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool) while (*p1) { /* Quote the characters that match the test mask. */ if (!is_urlsafe (*p1)) { - guchar c = *p1++; + guchar c = *p1++; *p2++ = '%'; *p2++ = XNUM_TO_DIGIT (c >> 4); *p2++ = XNUM_TO_DIGIT (c & 0xf); @@ -724,7 +1053,7 @@ url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool) /* URL-escape the unsafe characters (see urlchr_table) in a given string, returning a freshly allocated string. */ -gchar * +gchar * url_escape (const gchar *s, rspamd_mempool_t * pool) { return url_escape_1 (s, 0, pool); @@ -747,7 +1076,7 @@ char_needs_escaping (const gchar *p) return TRUE; } } - else if (! is_urlsafe (*p)) { + else if (!is_urlsafe (*p)) { return TRUE; } return FALSE; @@ -755,16 +1084,16 @@ char_needs_escaping (const gchar *p) /* Translate a %-escaped (but possibly non-conformant) input string S into a %-escaped (and conformant) output string. -*/ + */ -static gchar * +static gchar * reencode_escapes (gchar *s, rspamd_mempool_t * pool) { - const gchar *p1; - gchar *newstr, *p2; - gint oldlen, newlen; + const gchar *p1; + gchar *newstr, *p2; + gint oldlen, newlen; - gint encode_count = 0; + gint encode_count = 0; /* First pass: inspect the string to see if there's anything to do, and to calculate the new length. */ @@ -789,7 +1118,7 @@ reencode_escapes (gchar *s, rspamd_mempool_t * pool) while (*p1) if (char_needs_escaping (p1)) { - guchar c = *p1++; + guchar c = *p1++; *p2++ = '%'; *p2++ = XNUM_TO_DIGIT (c >> 4); *p2++ = XNUM_TO_DIGIT (c & 0xf); @@ -809,10 +1138,10 @@ reencode_escapes (gchar *s, rspamd_mempool_t * pool) static void unescape_single_char (gchar *str, gchar chr) { - const gchar c1 = XNUM_TO_DIGIT (chr >> 4); - const gchar c2 = XNUM_TO_DIGIT (chr & 0xf); - gchar *h = str; /* hare */ - gchar *t = str; /* tortoise */ + const gchar c1 = XNUM_TO_DIGIT (chr >> 4); + const gchar c2 = XNUM_TO_DIGIT (chr & 0xf); + gchar *h = str; /* hare */ + gchar *t = str; /* tortoise */ for (; *h; h++, t++) { if (h[0] == '%' && h[1] == c1 && h[2] == c2) { @@ -835,10 +1164,10 @@ unescape_single_char (gchar *str, gchar chr) static gboolean path_simplify (gchar *path) { - gchar *h = path; /* hare */ - gchar *t = path; /* tortoise */ - gchar *beg = path; /* boundary for backing the tortoise */ - gchar *end = path + strlen (path); + gchar *h = path; /* hare */ + gchar *t = path; /* tortoise */ + gchar *beg = path; /* boundary for backing the tortoise */ + gchar *end = path + strlen (path); while (h < end) { /* Hare should be at the beginning of a path element. */ @@ -852,7 +1181,7 @@ path_simplify (gchar *path) if (t > beg) { /* Move backwards until T hits the beginning of the previous path element or the beginning of path. */ - for (--t; t > beg && t[-1] != '/'; t--); + for (--t; t > beg && t[-1] != '/'; t--) ; } else { /* If we're at the beginning, copy the "../" literally @@ -864,7 +1193,7 @@ path_simplify (gchar *path) h += 3; } else { - regular: +regular: /* A regular path element. If H hasn't advanced past T, simply skip to the next path element. Otherwise, copy the path element until the next slash. */ @@ -894,10 +1223,10 @@ path_simplify (gchar *path) enum uri_errno parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) { - guchar *prefix_end, *host_end, *p; - guchar *lbracket, *rbracket; - gint datalen, n, addrlen; - guchar *frag_or_post, *user_end, *port_end; + guchar *prefix_end, *host_end, *p; + guchar *lbracket, *rbracket; + gint datalen, n, addrlen; + guchar *frag_or_post, *user_end, *port_end; memset (uri, 0, sizeof (*uri)); @@ -910,11 +1239,13 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) uri->protocollen = get_protocol_length (struri (uri)); /* Assume http as default protocol */ - if (!uri->protocollen || (uri->protocol = get_protocol (struri (uri), uri->protocollen)) == PROTOCOL_UNKNOWN) { + if (!uri->protocollen || + (uri->protocol = + get_protocol (struri (uri), uri->protocollen)) == PROTOCOL_UNKNOWN) { /* Make exception for numeric urls */ p = uri->string; while (*p && (g_ascii_isalnum (*p) || *p == ':')) { - p ++; + p++; } if (*p == '\0') { return URI_ERRNO_INVALID_PROTOCOL; @@ -929,7 +1260,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) /* Figure out whether the protocol is known */ msg_debug ("getting protocol from url: %d", uri->protocol); - prefix_end = struri (uri) + uri->protocollen; /* ':' */ + prefix_end = struri (uri) + uri->protocollen; /* ':' */ /* Check if there's a digit after the protocol name. */ if (g_ascii_isdigit (*prefix_end)) { @@ -1010,7 +1341,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) /* Possibly skip auth part */ host_end = prefix_end + strcspn (prefix_end, "@"); - if (prefix_end + strcspn (prefix_end, "/?") > host_end && *host_end) { /* we have auth info here */ + if (prefix_end + strcspn (prefix_end, "/?") > host_end && *host_end) { /* we have auth info here */ /* Allow '@' in the password component */ while (strcspn (host_end + 1, "@") < strcspn (host_end + 1, "/?")) @@ -1054,7 +1385,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) return URI_ERRNO_TRAILING_DOTS; } - if (*host_end == ':') { /* we have port here */ + if (*host_end == ':') { /* we have port here */ port_end = host_end + 1 + strcspn (host_end + 1, "/"); host_end++; @@ -1089,7 +1420,8 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) host_end++; } - else if (get_protocol_need_slash_after_host (uri->protocol) && *host_end != '?') { + else if (get_protocol_need_slash_after_host (uri->protocol) && *host_end != + '?') { /* The need for slash after the host component depends on the * need for a host component. -- The dangerous mind of Jonah */ if (!uri->hostlen) @@ -1114,7 +1446,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) } convert_to_lowercase (uri->string, uri->protocollen); - convert_to_lowercase (uri->host, uri->hostlen); + convert_to_lowercase (uri->host, uri->hostlen); /* Decode %HH sequences in host name. This is important not so much to support %HH sequences in host names (which other browser don't), but to support binary characters (which will have been @@ -1132,12 +1464,12 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) } static const gchar url_braces[] = { - '(', ')' , - '{', '}' , - '[', ']' , - '<', '>' , - '|', '|' , - '\'', '\'' + '(', ')', + '{', '}', + '[', ']', + '<', '>', + '|', '|', + '\'', '\'' }; static gboolean @@ -1156,22 +1488,28 @@ is_open_brace (gchar c) } static gboolean -url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +url_file_start (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match) { match->m_begin = pos; return TRUE; } static gboolean -url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +url_file_end (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match) { - const gchar *p; - gchar stop; - guint i; + const gchar *p; + gchar stop; + guint i; p = pos + strlen (match->pattern); stop = *p; if (*p == '/') { - p ++; + p++; } for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) { @@ -1182,7 +1520,7 @@ url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_ } while (p < end && *p != stop && is_urlsafe (*p)) { - p ++; + p++; } if (p == begin) { @@ -1195,14 +1533,18 @@ url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_ } static gboolean -url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +url_tld_start (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match) { - const gchar *p = pos; + const gchar *p = pos; /* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */ while (p >= begin) { - if ((!is_domain (*p) && *p != '.' && *p != '/') || g_ascii_isspace (*p)) { - p ++; + if ((!is_domain (*p) && *p != '.' && + *p != '/') || g_ascii_isspace (*p)) { + p++; if (!g_ascii_isalnum (*p)) { /* Urls cannot start with strange symbols */ return FALSE; @@ -1228,16 +1570,19 @@ url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match /* Urls cannot contain '/' in their body */ return FALSE; } - p --; + p--; } return FALSE; } static gboolean -url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +url_tld_end (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match) { - const gchar *p; + const gchar *p; /* A url must be finished by tld, so it must be followed by space character */ p = pos + strlen (match->pattern); @@ -1250,7 +1595,10 @@ url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t p = match->m_begin; /* Check common prefix */ if (g_ascii_strncasecmp (p, "http://", sizeof ("http://") - 1) == 0) { - return url_web_end (begin, end, match->m_begin + sizeof ("http://") - 1, match); + return url_web_end (begin, + end, + match->m_begin + sizeof ("http://") - 1, + match); } else { return url_web_end (begin, end, match->m_begin, match); @@ -1261,10 +1609,15 @@ url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t } static gboolean -url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +url_web_start (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match) { /* Check what we have found */ - if (pos > begin && (g_ascii_strncasecmp (pos, "www", 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) { + if (pos > begin && + (g_ascii_strncasecmp (pos, "www", + 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) { if (!is_open_brace (*(pos - 1)) && !g_ascii_isspace (*(pos - 1))) { return FALSE; } @@ -1279,13 +1632,16 @@ url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match } static gboolean -url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +url_web_end (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match) { - const gchar *p, *c; - gchar open_brace = '\0', close_brace = '\0'; - gint brace_stack = 0; - gboolean passwd = FALSE; - guint port, i; + const gchar *p, *c; + gchar open_brace = '\0', close_brace = '\0'; + gint brace_stack = 0; + gboolean passwd = FALSE; + guint port, i; p = pos + strlen (match->pattern); for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) { @@ -1311,7 +1667,8 @@ url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t p++; } - if ((p + 1) < end && *p == '.' && (is_atom (*(p + 1)) || *(p + 1) == '/')) { + if ((p + 1) < end && *p == '.' && + (is_atom (*(p + 1)) || *(p + 1) == '/')) { p++; } } @@ -1338,7 +1695,9 @@ domain: p++; } - if ((p + 1) < end && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/' || (*(p + 1) & 0x80))) { + if ((p + 1) < end && *p == '.' && + (is_domain (*(p + 1)) || *(p + 1) == '/' || + (*(p + 1) & 0x80))) { p++; } } @@ -1372,7 +1731,7 @@ domain: } } else { - passwd: +passwd: passwd = TRUE; c = p; @@ -1396,7 +1755,7 @@ domain: break; } - /* we have a '/' so there could be a path - fall through */ + /* we have a '/' so there could be a path - fall through */ case '/': /* we've detected a path component to our url */ p++; case '?': @@ -1434,15 +1793,18 @@ domain: static gboolean -url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +url_email_start (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match) { - const gchar *p; + const gchar *p; /* Check what we have found */ if (pos > begin && *pos == '@') { /* Try to extract it with username */ p = pos - 1; while (p > begin && (is_domain (*p) || *p == '.' || *p == '_')) { - p --; + p--; } if (!is_domain (*p) && p != pos - 1) { match->m_begin = p + 1; @@ -1464,10 +1826,13 @@ url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_mat } static gboolean -url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +url_email_end (const gchar *begin, + const gchar *end, + const gchar *pos, + url_match_t *match) { - const gchar *p; - gboolean got_at = FALSE; + const gchar *p; + gboolean got_at = FALSE; p = pos + strlen (match->pattern); if (*pos == '@') { @@ -1475,12 +1840,12 @@ url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match } while (p < end && (is_domain (*p) || *p == '_' - || (*p == '@' && !got_at) || - (*p == '.' && p + 1 < end && is_domain (*(p + 1))))) { + || (*p == '@' && !got_at) || + (*p == '.' && p + 1 < end && is_domain (*(p + 1))))) { if (*p == '@') { got_at = TRUE; } - p ++; + p++; } match->m_len = p - match->m_begin; match->add_prefix = TRUE; @@ -1488,13 +1853,16 @@ url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match } void -url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html) +url_parse_text (rspamd_mempool_t * pool, + struct rspamd_task *task, + struct mime_text_part *part, + gboolean is_html) { - gint rc; - gchar *url_str = NULL, *url_start, *url_end; - struct uri *new; - struct process_exception *ex; - gchar *p, *end, *begin; + gint rc; + gchar *url_str = NULL, *url_start, *url_end; + struct uri *new; + struct process_exception *ex; + gchar *p, *end, *begin; if (!part->orig->data || part->orig->len == 0) { @@ -1514,15 +1882,19 @@ url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_t p = begin; } while (p < end) { - if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str, is_html)) { + if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str, + is_html)) { if (url_str != NULL) { new = rspamd_mempool_alloc0 (pool, sizeof (struct uri)); - ex = rspamd_mempool_alloc0 (pool, sizeof (struct process_exception)); + ex = + rspamd_mempool_alloc0 (pool, + sizeof (struct process_exception)); if (new != NULL) { g_strstrip (url_str); rc = parse_uri (new, url_str, pool); - if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) && - new->hostlen > 0) { + if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || + rc == URI_ERRNO_NO_HOST_SLASH) && + new->hostlen > 0) { ex->pos = url_start - begin; ex->len = url_end - url_start; if (new->protocol == PROTOCOL_MAILTO) { @@ -1537,10 +1909,14 @@ url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_t g_tree_insert (task->urls, new, new); } } - part->urls_offset = g_list_prepend (part->urls_offset, ex); + part->urls_offset = g_list_prepend ( + part->urls_offset, + ex); } else if (rc != URI_ERRNO_OK) { - msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); + msg_info ("extract of url '%s' failed: %s", + url_str, + url_strerror (rc)); } } } @@ -1554,21 +1930,30 @@ url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_t /* Handle offsets of this part */ if (part->urls_offset != NULL) { part->urls_offset = g_list_reverse (part->urls_offset); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, part->urls_offset); + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t)g_list_free, part->urls_offset); } } gboolean -url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **fin, gchar **url_str, gboolean is_html) +url_try_text (rspamd_mempool_t *pool, + const gchar *begin, + gsize len, + gchar **start, + gchar **fin, + gchar **url_str, + gboolean is_html) { - const gchar *end, *pos; - gint idx, l; - struct url_matcher *matcher; - url_match_t m; + const gchar *end, *pos; + gint idx, l; + struct url_matcher *matcher; + url_match_t m; end = begin + len; if (url_init () == 0) { - if ((pos = rspamd_trie_lookup (url_scanner->patterns, begin, len, &idx)) == NULL) { + if ((pos = + rspamd_trie_lookup (url_scanner->patterns, begin, len, + &idx)) == NULL) { return FALSE; } else { @@ -1580,11 +1965,17 @@ url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **sta m.pattern = matcher->pattern; m.prefix = matcher->prefix; m.add_prefix = FALSE; - if (matcher->start (begin, end, pos, &m) && matcher->end (begin, end, pos, &m)) { + if (matcher->start (begin, end, pos, + &m) && matcher->end (begin, end, pos, &m)) { if (m.add_prefix) { l = m.m_len + 1 + strlen (m.prefix); *url_str = rspamd_mempool_alloc (pool, l); - rspamd_snprintf (*url_str, l, "%s%*s", m.prefix, m.m_len, m.m_begin); + rspamd_snprintf (*url_str, + l, + "%s%*s", + m.prefix, + m.m_len, + m.m_begin); } else { *url_str = rspamd_mempool_alloc (pool, m.m_len + 1); |