aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver/url.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-07-23 12:57:31 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-07-23 12:57:31 +0100
commit379055dbbb4af997b4d3ffb161d447872d7ca357 (patch)
tree3774553d470f93e12ddeb454aad9b3b607cf8918 /src/libserver/url.c
parent602ae7a0b7e215ba2677131b8fdc70abc156b3ca (diff)
downloadrspamd-379055dbbb4af997b4d3ffb161d447872d7ca357.tar.gz
rspamd-379055dbbb4af997b4d3ffb161d447872d7ca357.zip
Unify style without sorting headers.
Diffstat (limited to 'src/libserver/url.c')
-rw-r--r--src/libserver/url.c1313
1 files changed, 852 insertions, 461 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c
index c4313e8a9..df4e3102d 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -38,16 +38,16 @@
#define HIGHEST_PORT 65535
#define uri_port_is_valid(port) \
- (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT)
+ (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT)
struct _proto {
- guchar *name;
- gint port;
- uintptr_t *unused;
- guint need_slashes:1;
- guint need_slash_after_host:1;
- guint free_syntax:1;
- guint need_ssl:1;
+ guchar *name;
+ gint port;
+ uintptr_t *unused;
+ guint need_slashes : 1;
+ guint need_slash_after_host : 1;
+ guint free_syntax : 1;
+ guint need_ssl : 1;
};
typedef struct url_match_s {
@@ -64,314 +64,627 @@ typedef struct url_match_s {
struct url_matcher {
const gchar *pattern;
const gchar *prefix;
- gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
- gboolean (*end)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+ gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos,
+ url_match_t *match);
+ gboolean (*end)(const gchar *begin, const gchar *end, const gchar *pos,
+ url_match_t *match);
gint flags;
};
-static gboolean url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
-static gboolean url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
-
-static gboolean url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
-static gboolean url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
-
-static gboolean url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
-static gboolean url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
-
-static gboolean url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
-static gboolean url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+static gboolean url_file_start (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match);
+static gboolean url_file_end (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_web_start (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match);
+static gboolean url_web_end (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_tld_start (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match);
+static gboolean url_tld_end (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_email_start (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match);
+static gboolean url_email_end (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match);
struct url_matcher matchers[] = {
- /* Common prefixes */
- { "file://", "", url_file_start, url_file_end, 0 },
- { "ftp://", "", url_web_start, url_web_end, 0 },
- { "sftp://", "", url_web_start, url_web_end, 0 },
- { "http://", "", url_web_start, url_web_end, 0 },
- { "https://", "", url_web_start, url_web_end, 0 },
- { "news://", "", url_web_start, url_web_end, 0 },
- { "nntp://", "", url_web_start, url_web_end, 0 },
- { "telnet://", "", url_web_start, url_web_end, 0 },
- { "webcal://", "", url_web_start, url_web_end, 0 },
- { "mailto://", "", url_email_start, url_email_end, 0 },
- { "callto://", "", url_web_start, url_web_end, 0 },
- { "h323:", "", url_web_start, url_web_end, 0 },
- { "sip:", "", url_web_start, url_web_end, 0 },
- { "www.", "http://", url_web_start, url_web_end, 0 },
- { "ftp.", "ftp://", url_web_start, url_web_end, URL_FLAG_NOHTML },
- /* TLD domains parts */
- { ".ac", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ad", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ae", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".aero", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".af", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ag", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ai", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".al", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".am", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".an", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ao", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".aq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ar", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".arpa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".as", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".asia", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".at", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".au", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".aw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ax", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".az", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ba", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".be", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".biz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".br", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".by", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".bz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ca", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cat", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ch", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ci", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ck", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".co", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".com", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".coop", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".cz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".de", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".dj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".dk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".dm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".do", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".dz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ec", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".edu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ee", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".eg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".er", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".es", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".et", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".eu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".fi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".fj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".fk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".fm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".fo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".fr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ga", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ge", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gov", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".gy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".hk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".hm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".hn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".hr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ht", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".hu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".id", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ie", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".il", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".im", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".in", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".info", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".int", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".io", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".iq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ir", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".is", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".it", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".je", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".jm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".jo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".jobs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".jp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ke", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".kg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".kh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ki", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".km", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".kn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".kp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".kr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".kw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ky", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".kz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".la", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".lb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".lc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".li", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".lk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".lr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ls", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".lt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".lu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".lv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ly", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ma", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".md", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".me", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mil", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ml", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mobi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ms", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".museum", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".my", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".mz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".na", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".name", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".nc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ne", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".net", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".nf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ng", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ni", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".nl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".no", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".np", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".nr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".nu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".nz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".om", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".org", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pe", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ph", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ps", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".pw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".py", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".qa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".re", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".rs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ru", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".rw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".se", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".si", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".so", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".st", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".su", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".sz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".td", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".th", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".to", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".travel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".tz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ua", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ug", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".uk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".us", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".uy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".uz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".va", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".vc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ve", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".vg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".vi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".vn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".vu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".wf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ws", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".xxx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".ye", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".yt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".za", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".zm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- { ".zw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
- /* Likely emails */
- { "@", "mailto://",url_email_start, url_email_end, URL_FLAG_NOHTML }
+ /* Common prefixes */
+ { "file://", "", url_file_start, url_file_end,
+ 0 },
+ { "ftp://", "", url_web_start, url_web_end,
+ 0 },
+ { "sftp://", "", url_web_start, url_web_end,
+ 0 },
+ { "http://", "", url_web_start, url_web_end,
+ 0 },
+ { "https://", "", url_web_start, url_web_end,
+ 0 },
+ { "news://", "", url_web_start, url_web_end,
+ 0 },
+ { "nntp://", "", url_web_start, url_web_end,
+ 0 },
+ { "telnet://", "", url_web_start, url_web_end,
+ 0 },
+ { "webcal://", "", url_web_start, url_web_end,
+ 0 },
+ { "mailto://", "", url_email_start, url_email_end,
+ 0 },
+ { "callto://", "", url_web_start, url_web_end,
+ 0 },
+ { "h323:", "", url_web_start, url_web_end,
+ 0 },
+ { "sip:", "", url_web_start, url_web_end,
+ 0 },
+ { "www.", "http://", url_web_start, url_web_end,
+ 0 },
+ { "ftp.", "ftp://", url_web_start, url_web_end,
+ URL_FLAG_NOHTML },
+ /* TLD domains parts */
+ { ".ac", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ad", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ae", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".aero", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".af", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ag", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ai", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".al", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".am", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".an", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ao", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".aq", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ar", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".arpa", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".as", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".asia", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".at", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".au", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".aw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ax", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".az", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ba", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bb", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bd", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".be", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bf", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bh", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bi", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".biz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bj", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bo", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".br", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bs", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bt", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bv", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".by", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ca", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cat", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cc", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cd", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cf", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ch", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ci", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ck", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cl", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".co", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".com", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".coop", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cu", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cv", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cx", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cy", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".de", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".dj", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".dk", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".dm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".do", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".dz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ec", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".edu", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ee", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".eg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".er", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".es", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".et", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".eu", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fi", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fj", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fk", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fo", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ga", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gb", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gd", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ge", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gf", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gh", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gi", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gl", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gov", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gp", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gq", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gs", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gt", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gu", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gy", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hk", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ht", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hu", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".id", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ie", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".il", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".im", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".in", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".info", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".int", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".io", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".iq", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ir", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".is", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".it", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".je", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".jm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".jo", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".jobs", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".jp", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ke", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kh", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ki", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".km", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kp", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ky", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".la", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lb", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lc", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".li", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lk", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ls", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lt", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lu", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lv", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ly", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ma", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mc", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".md", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".me", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mh", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mil", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mk", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ml", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mo", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mobi", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mp", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mq", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ms", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mt", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mu", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".museum", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mv", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mx", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".my", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".na", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".name", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nc", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ne", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".net", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nf", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ng", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ni", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nl", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".no", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".np", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nu", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".om", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".org", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pa", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pe", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pf", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ph", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pk", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pl", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pro", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ps", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pt", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".py", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".qa", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".re", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ro", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".rs", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ru", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".rw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sa", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sb", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sc", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sd", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".se", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sh", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".si", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sj", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sk", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sl", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".so", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".st", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".su", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sv", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sx", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sy", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tc", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".td", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tel", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tf", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".th", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tj", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tk", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tl", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".to", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tp", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tr", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".travel", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tt", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tv", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ua", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ug", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".uk", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".us", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".uy", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".uz", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".va", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vc", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ve", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vg", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vi", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vn", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vu", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".wf", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ws", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".xxx", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ye", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".yt", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".za", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".zm", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".zw", "http://", url_tld_start, url_tld_end,
+ URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ /* Likely emails */
+ { "@", "mailto://",url_email_start, url_email_end,
+ URL_FLAG_NOHTML }
};
struct url_match_scanner {
@@ -382,7 +695,7 @@ struct url_match_scanner {
struct url_match_scanner *url_scanner = NULL;
-static const struct _proto protocol_backends[] = {
+static const struct _proto protocol_backends[] = {
{"file", 0, NULL, 1, 0, 0, 0},
{"ftp", 21, NULL, 1, 0, 0, 0},
{"http", 80, NULL, 1, 0, 0, 0},
@@ -395,7 +708,8 @@ static const struct _proto protocol_backends[] = {
/* Convert an ASCII hex digit to the corresponding number between 0
and 15. H should be a hexadecimal digit that satisfies isxdigit;
otherwise, the result is undefined. */
-#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : g_ascii_toupper (h) - 'A' + 10)
+#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : g_ascii_toupper (h) - 'A' + \
+ 10)
#define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2))
/* The reverse of the above: convert a number in the [0, 16) range to
the ASCII representation of the corresponding hexadecimal digit.
@@ -404,45 +718,47 @@ static const struct _proto protocol_backends[] = {
#define XNUM_TO_digit(x) ("0123456789abcdef"[x] + 0)
static guchar url_scanner_table[256] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 24,128,160,128,128,128,128,128,160,160,128,128,160,192,160,160,
- 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,160,160, 32,128, 32,128,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 24,128,160,128,128,128,128,128,160,160,128,128,160,192,160,160,
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,160,160, 32,128, 32,128,
160, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
- 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,192,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,192,
128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
- 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
enum {
- IS_CTRL = (1 << 0),
- IS_ALPHA = (1 << 1),
- IS_DIGIT = (1 << 2),
- IS_LWSP = (1 << 3),
- IS_SPACE = (1 << 4),
- IS_SPECIAL = (1 << 5),
- IS_DOMAIN = (1 << 6),
- IS_URLSAFE = (1 << 7)
+ IS_CTRL = (1 << 0),
+ IS_ALPHA = (1 << 1),
+ IS_DIGIT = (1 << 2),
+ IS_LWSP = (1 << 3),
+ IS_SPACE = (1 << 4),
+ IS_SPECIAL = (1 << 5),
+ IS_DOMAIN = (1 << 6),
+ IS_URLSAFE = (1 << 7)
};
#define is_ctrl(x) ((url_scanner_table[(guchar)(x)] & IS_CTRL) != 0)
#define is_lwsp(x) ((url_scanner_table[(guchar)(x)] & IS_LWSP) != 0)
-#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL|IS_SPACE|IS_CTRL)) == 0)
+#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL | IS_SPACE | \
+ IS_CTRL)) == 0)
#define is_alpha(x) ((url_scanner_table[(guchar)(x)] & IS_ALPHA) != 0)
#define is_digit(x) ((url_scanner_table[(guchar)(x)] & IS_DIGIT) != 0)
#define is_domain(x) ((url_scanner_table[(guchar)(x)] & IS_DOMAIN) != 0)
-#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA|IS_DIGIT|IS_URLSAFE)) != 0)
+#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA | IS_DIGIT | \
+ IS_URLSAFE)) != 0)
-const gchar *
+const gchar *
url_strerror (enum uri_errno err)
{
switch (err) {
@@ -477,7 +793,7 @@ url_strerror (enum uri_errno err)
static gint
check_uri_file (gchar *name)
{
- static const gchar chars[] = POST_CHAR_S "#?";
+ static const gchar chars[] = POST_CHAR_S "#?";
return strcspn (name, chars);
}
@@ -485,30 +801,41 @@ check_uri_file (gchar *name)
static gint
url_init (void)
{
- guint i;
- gchar patbuf[128];
+ guint i;
+ gchar patbuf[128];
if (url_scanner == NULL) {
url_scanner = g_malloc (sizeof (struct url_match_scanner));
url_scanner->matchers = matchers;
url_scanner->matchers_count = G_N_ELEMENTS (matchers);
url_scanner->patterns = rspamd_trie_create (TRUE);
- for (i = 0; i < url_scanner->matchers_count; i ++) {
+ for (i = 0; i < url_scanner->matchers_count; i++) {
if (matchers[i].flags & URL_FLAG_STRICT_MATCH) {
/* Insert more specific patterns */
/* some.tld/ */
- rspamd_snprintf (patbuf, sizeof (patbuf), "%s/", matchers[i].pattern);
+ rspamd_snprintf (patbuf,
+ sizeof (patbuf),
+ "%s/",
+ matchers[i].pattern);
rspamd_trie_insert (url_scanner->patterns, patbuf, i);
/* some.tld */
- rspamd_snprintf (patbuf, sizeof (patbuf), "%s ", matchers[i].pattern);
+ rspamd_snprintf (patbuf,
+ sizeof (patbuf),
+ "%s ",
+ matchers[i].pattern);
rspamd_trie_insert (url_scanner->patterns, patbuf, i);
/* some.tld: */
- rspamd_snprintf (patbuf, sizeof (patbuf), "%s:", matchers[i].pattern);
+ rspamd_snprintf (patbuf,
+ sizeof (patbuf),
+ "%s:",
+ matchers[i].pattern);
rspamd_trie_insert (url_scanner->patterns, patbuf, i);
}
else {
- rspamd_trie_insert (url_scanner->patterns, matchers[i].pattern, i);
+ rspamd_trie_insert (url_scanner->patterns,
+ matchers[i].pattern,
+ i);
}
}
}
@@ -522,10 +849,10 @@ get_protocol (gchar *name, gint namelen)
/* These are really enum protocol values but can take on negative
* values and since 0 <= -1 for enum values it's better to use clean
* integer type. */
- gint start, end;
- enum protocol protocol;
- guchar *pname;
- gint pnamelen, minlen, compare;
+ gint start, end;
+ enum protocol protocol;
+ guchar *pname;
+ gint pnamelen, minlen, compare;
/* Almost dichotomic search is used here */
/* Starting at the HTTP entry which is the most common that will make
@@ -590,7 +917,7 @@ get_protocol_free_syntax (enum protocol protocol)
static gint
get_protocol_length (const gchar *url)
{
- gchar *end = (gchar *)url;
+ gchar *end = (gchar *)url;
/* Seek the end of the protocol name if any. */
/* RFC1738:
@@ -610,11 +937,12 @@ get_protocol_length (const gchar *url)
static guint
url_calculate_escaped_hostlen (gchar *host, guint hostlen)
{
- guint i, result = hostlen;
- gchar *p = host, c;
+ guint i, result = hostlen;
+ gchar *p = host, c;
for (i = 0; i < hostlen; i++, p++) {
- if (*p == '%' && g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2)) && i < hostlen - 2) {
+ if (*p == '%' && g_ascii_isxdigit (*(p + 1)) &&
+ g_ascii_isxdigit (*(p + 2)) && i < hostlen - 2) {
c = X2DIGITS_TO_NUM (*(p + 1), *(p + 2));
if (c != '\0') {
result -= 2;
@@ -637,18 +965,19 @@ url_calculate_escaped_hostlen (gchar *host, guint hostlen)
static void
url_unescape (gchar *s)
{
- gchar *t = s; /* t - tortoise */
- gchar *h = s; /* h - hare */
+ gchar *t = s; /* t - tortoise */
+ gchar *h = s; /* h - hare */
for (; *h; h++, t++) {
if (*h != '%') {
- copychar:
+copychar:
*t = *h;
}
else {
- gchar c;
+ gchar c;
/* Do nothing if '%' is not followed by two hex digits. */
- if (!h[1] || !h[2] || !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2])))
+ if (!h[1] || !h[2] ||
+ !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2])))
goto copychar;
c = X2DIGITS_TO_NUM (h[1], h[2]);
/* Don't unescape %00 because there is no way to insert it
@@ -665,8 +994,8 @@ url_unescape (gchar *s)
static void
url_strip (gchar *s)
{
- gchar *t = s; /* t - tortoise */
- gchar *h = s; /* h - hare */
+ gchar *t = s; /* t - tortoise */
+ gchar *h = s; /* h - hare */
while (*h) {
if (g_ascii_isgraph (*h)) {
@@ -678,17 +1007,17 @@ url_strip (gchar *s)
*t = '\0';
}
-static gchar *
+static gchar *
url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool)
{
- const gchar *p1;
- gchar *p2, *newstr;
- gint newlen;
- gint addition = 0;
+ const gchar *p1;
+ gchar *p2, *newstr;
+ gint newlen;
+ gint addition = 0;
for (p1 = s; *p1; p1++)
if (!is_urlsafe (*p1)) {
- addition += 2; /* Two more characters (hex digits) */
+ addition += 2; /* Two more characters (hex digits) */
}
if (!addition) {
@@ -708,7 +1037,7 @@ url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool)
while (*p1) {
/* Quote the characters that match the test mask. */
if (!is_urlsafe (*p1)) {
- guchar c = *p1++;
+ guchar c = *p1++;
*p2++ = '%';
*p2++ = XNUM_TO_DIGIT (c >> 4);
*p2++ = XNUM_TO_DIGIT (c & 0xf);
@@ -724,7 +1053,7 @@ url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool)
/* URL-escape the unsafe characters (see urlchr_table) in a given
string, returning a freshly allocated string. */
-gchar *
+gchar *
url_escape (const gchar *s, rspamd_mempool_t * pool)
{
return url_escape_1 (s, 0, pool);
@@ -747,7 +1076,7 @@ char_needs_escaping (const gchar *p)
return TRUE;
}
}
- else if (! is_urlsafe (*p)) {
+ else if (!is_urlsafe (*p)) {
return TRUE;
}
return FALSE;
@@ -755,16 +1084,16 @@ char_needs_escaping (const gchar *p)
/* Translate a %-escaped (but possibly non-conformant) input string S
into a %-escaped (and conformant) output string.
-*/
+ */
-static gchar *
+static gchar *
reencode_escapes (gchar *s, rspamd_mempool_t * pool)
{
- const gchar *p1;
- gchar *newstr, *p2;
- gint oldlen, newlen;
+ const gchar *p1;
+ gchar *newstr, *p2;
+ gint oldlen, newlen;
- gint encode_count = 0;
+ gint encode_count = 0;
/* First pass: inspect the string to see if there's anything to do,
and to calculate the new length. */
@@ -789,7 +1118,7 @@ reencode_escapes (gchar *s, rspamd_mempool_t * pool)
while (*p1)
if (char_needs_escaping (p1)) {
- guchar c = *p1++;
+ guchar c = *p1++;
*p2++ = '%';
*p2++ = XNUM_TO_DIGIT (c >> 4);
*p2++ = XNUM_TO_DIGIT (c & 0xf);
@@ -809,10 +1138,10 @@ reencode_escapes (gchar *s, rspamd_mempool_t * pool)
static void
unescape_single_char (gchar *str, gchar chr)
{
- const gchar c1 = XNUM_TO_DIGIT (chr >> 4);
- const gchar c2 = XNUM_TO_DIGIT (chr & 0xf);
- gchar *h = str; /* hare */
- gchar *t = str; /* tortoise */
+ const gchar c1 = XNUM_TO_DIGIT (chr >> 4);
+ const gchar c2 = XNUM_TO_DIGIT (chr & 0xf);
+ gchar *h = str; /* hare */
+ gchar *t = str; /* tortoise */
for (; *h; h++, t++) {
if (h[0] == '%' && h[1] == c1 && h[2] == c2) {
@@ -835,10 +1164,10 @@ unescape_single_char (gchar *str, gchar chr)
static gboolean
path_simplify (gchar *path)
{
- gchar *h = path; /* hare */
- gchar *t = path; /* tortoise */
- gchar *beg = path; /* boundary for backing the tortoise */
- gchar *end = path + strlen (path);
+ gchar *h = path; /* hare */
+ gchar *t = path; /* tortoise */
+ gchar *beg = path; /* boundary for backing the tortoise */
+ gchar *end = path + strlen (path);
while (h < end) {
/* Hare should be at the beginning of a path element. */
@@ -852,7 +1181,7 @@ path_simplify (gchar *path)
if (t > beg) {
/* Move backwards until T hits the beginning of the
previous path element or the beginning of path. */
- for (--t; t > beg && t[-1] != '/'; t--);
+ for (--t; t > beg && t[-1] != '/'; t--) ;
}
else {
/* If we're at the beginning, copy the "../" literally
@@ -864,7 +1193,7 @@ path_simplify (gchar *path)
h += 3;
}
else {
- regular:
+regular:
/* A regular path element. If H hasn't advanced past T,
simply skip to the next path element. Otherwise, copy
the path element until the next slash. */
@@ -894,10 +1223,10 @@ path_simplify (gchar *path)
enum uri_errno
parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
{
- guchar *prefix_end, *host_end, *p;
- guchar *lbracket, *rbracket;
- gint datalen, n, addrlen;
- guchar *frag_or_post, *user_end, *port_end;
+ guchar *prefix_end, *host_end, *p;
+ guchar *lbracket, *rbracket;
+ gint datalen, n, addrlen;
+ guchar *frag_or_post, *user_end, *port_end;
memset (uri, 0, sizeof (*uri));
@@ -910,11 +1239,13 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
uri->protocollen = get_protocol_length (struri (uri));
/* Assume http as default protocol */
- if (!uri->protocollen || (uri->protocol = get_protocol (struri (uri), uri->protocollen)) == PROTOCOL_UNKNOWN) {
+ if (!uri->protocollen ||
+ (uri->protocol =
+ get_protocol (struri (uri), uri->protocollen)) == PROTOCOL_UNKNOWN) {
/* Make exception for numeric urls */
p = uri->string;
while (*p && (g_ascii_isalnum (*p) || *p == ':')) {
- p ++;
+ p++;
}
if (*p == '\0') {
return URI_ERRNO_INVALID_PROTOCOL;
@@ -929,7 +1260,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
/* Figure out whether the protocol is known */
msg_debug ("getting protocol from url: %d", uri->protocol);
- prefix_end = struri (uri) + uri->protocollen; /* ':' */
+ prefix_end = struri (uri) + uri->protocollen; /* ':' */
/* Check if there's a digit after the protocol name. */
if (g_ascii_isdigit (*prefix_end)) {
@@ -1010,7 +1341,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
/* Possibly skip auth part */
host_end = prefix_end + strcspn (prefix_end, "@");
- if (prefix_end + strcspn (prefix_end, "/?") > host_end && *host_end) { /* we have auth info here */
+ if (prefix_end + strcspn (prefix_end, "/?") > host_end && *host_end) { /* we have auth info here */
/* Allow '@' in the password component */
while (strcspn (host_end + 1, "@") < strcspn (host_end + 1, "/?"))
@@ -1054,7 +1385,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
return URI_ERRNO_TRAILING_DOTS;
}
- if (*host_end == ':') { /* we have port here */
+ if (*host_end == ':') { /* we have port here */
port_end = host_end + 1 + strcspn (host_end + 1, "/");
host_end++;
@@ -1089,7 +1420,8 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
host_end++;
}
- else if (get_protocol_need_slash_after_host (uri->protocol) && *host_end != '?') {
+ else if (get_protocol_need_slash_after_host (uri->protocol) && *host_end !=
+ '?') {
/* The need for slash after the host component depends on the
* need for a host component. -- The dangerous mind of Jonah */
if (!uri->hostlen)
@@ -1114,7 +1446,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
}
convert_to_lowercase (uri->string, uri->protocollen);
- convert_to_lowercase (uri->host, uri->hostlen);
+ convert_to_lowercase (uri->host, uri->hostlen);
/* Decode %HH sequences in host name. This is important not so much
to support %HH sequences in host names (which other browser
don't), but to support binary characters (which will have been
@@ -1132,12 +1464,12 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
}
static const gchar url_braces[] = {
- '(', ')' ,
- '{', '}' ,
- '[', ']' ,
- '<', '>' ,
- '|', '|' ,
- '\'', '\''
+ '(', ')',
+ '{', '}',
+ '[', ']',
+ '<', '>',
+ '|', '|',
+ '\'', '\''
};
static gboolean
@@ -1156,22 +1488,28 @@ is_open_brace (gchar c)
}
static gboolean
-url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+url_file_start (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match)
{
match->m_begin = pos;
return TRUE;
}
static gboolean
-url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+url_file_end (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match)
{
- const gchar *p;
- gchar stop;
- guint i;
+ const gchar *p;
+ gchar stop;
+ guint i;
p = pos + strlen (match->pattern);
stop = *p;
if (*p == '/') {
- p ++;
+ p++;
}
for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) {
@@ -1182,7 +1520,7 @@ url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_
}
while (p < end && *p != stop && is_urlsafe (*p)) {
- p ++;
+ p++;
}
if (p == begin) {
@@ -1195,14 +1533,18 @@ url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_
}
static gboolean
-url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+url_tld_start (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match)
{
- const gchar *p = pos;
+ const gchar *p = pos;
/* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */
while (p >= begin) {
- if ((!is_domain (*p) && *p != '.' && *p != '/') || g_ascii_isspace (*p)) {
- p ++;
+ if ((!is_domain (*p) && *p != '.' &&
+ *p != '/') || g_ascii_isspace (*p)) {
+ p++;
if (!g_ascii_isalnum (*p)) {
/* Urls cannot start with strange symbols */
return FALSE;
@@ -1228,16 +1570,19 @@ url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match
/* Urls cannot contain '/' in their body */
return FALSE;
}
- p --;
+ p--;
}
return FALSE;
}
static gboolean
-url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+url_tld_end (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match)
{
- const gchar *p;
+ const gchar *p;
/* A url must be finished by tld, so it must be followed by space character */
p = pos + strlen (match->pattern);
@@ -1250,7 +1595,10 @@ url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t
p = match->m_begin;
/* Check common prefix */
if (g_ascii_strncasecmp (p, "http://", sizeof ("http://") - 1) == 0) {
- return url_web_end (begin, end, match->m_begin + sizeof ("http://") - 1, match);
+ return url_web_end (begin,
+ end,
+ match->m_begin + sizeof ("http://") - 1,
+ match);
}
else {
return url_web_end (begin, end, match->m_begin, match);
@@ -1261,10 +1609,15 @@ url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t
}
static gboolean
-url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+url_web_start (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match)
{
/* Check what we have found */
- if (pos > begin && (g_ascii_strncasecmp (pos, "www", 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) {
+ if (pos > begin &&
+ (g_ascii_strncasecmp (pos, "www",
+ 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) {
if (!is_open_brace (*(pos - 1)) && !g_ascii_isspace (*(pos - 1))) {
return FALSE;
}
@@ -1279,13 +1632,16 @@ url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match
}
static gboolean
-url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+url_web_end (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match)
{
- const gchar *p, *c;
- gchar open_brace = '\0', close_brace = '\0';
- gint brace_stack = 0;
- gboolean passwd = FALSE;
- guint port, i;
+ const gchar *p, *c;
+ gchar open_brace = '\0', close_brace = '\0';
+ gint brace_stack = 0;
+ gboolean passwd = FALSE;
+ guint port, i;
p = pos + strlen (match->pattern);
for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) {
@@ -1311,7 +1667,8 @@ url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t
p++;
}
- if ((p + 1) < end && *p == '.' && (is_atom (*(p + 1)) || *(p + 1) == '/')) {
+ if ((p + 1) < end && *p == '.' &&
+ (is_atom (*(p + 1)) || *(p + 1) == '/')) {
p++;
}
}
@@ -1338,7 +1695,9 @@ domain:
p++;
}
- if ((p + 1) < end && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/' || (*(p + 1) & 0x80))) {
+ if ((p + 1) < end && *p == '.' &&
+ (is_domain (*(p + 1)) || *(p + 1) == '/' ||
+ (*(p + 1) & 0x80))) {
p++;
}
}
@@ -1372,7 +1731,7 @@ domain:
}
}
else {
- passwd:
+passwd:
passwd = TRUE;
c = p;
@@ -1396,7 +1755,7 @@ domain:
break;
}
- /* we have a '/' so there could be a path - fall through */
+ /* we have a '/' so there could be a path - fall through */
case '/': /* we've detected a path component to our url */
p++;
case '?':
@@ -1434,15 +1793,18 @@ domain:
static gboolean
-url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+url_email_start (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match)
{
- const gchar *p;
+ const gchar *p;
/* Check what we have found */
if (pos > begin && *pos == '@') {
/* Try to extract it with username */
p = pos - 1;
while (p > begin && (is_domain (*p) || *p == '.' || *p == '_')) {
- p --;
+ p--;
}
if (!is_domain (*p) && p != pos - 1) {
match->m_begin = p + 1;
@@ -1464,10 +1826,13 @@ url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_mat
}
static gboolean
-url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+url_email_end (const gchar *begin,
+ const gchar *end,
+ const gchar *pos,
+ url_match_t *match)
{
- const gchar *p;
- gboolean got_at = FALSE;
+ const gchar *p;
+ gboolean got_at = FALSE;
p = pos + strlen (match->pattern);
if (*pos == '@') {
@@ -1475,12 +1840,12 @@ url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match
}
while (p < end && (is_domain (*p) || *p == '_'
- || (*p == '@' && !got_at) ||
- (*p == '.' && p + 1 < end && is_domain (*(p + 1))))) {
+ || (*p == '@' && !got_at) ||
+ (*p == '.' && p + 1 < end && is_domain (*(p + 1))))) {
if (*p == '@') {
got_at = TRUE;
}
- p ++;
+ p++;
}
match->m_len = p - match->m_begin;
match->add_prefix = TRUE;
@@ -1488,13 +1853,16 @@ url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match
}
void
-url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html)
+url_parse_text (rspamd_mempool_t * pool,
+ struct rspamd_task *task,
+ struct mime_text_part *part,
+ gboolean is_html)
{
- gint rc;
- gchar *url_str = NULL, *url_start, *url_end;
- struct uri *new;
- struct process_exception *ex;
- gchar *p, *end, *begin;
+ gint rc;
+ gchar *url_str = NULL, *url_start, *url_end;
+ struct uri *new;
+ struct process_exception *ex;
+ gchar *p, *end, *begin;
if (!part->orig->data || part->orig->len == 0) {
@@ -1514,15 +1882,19 @@ url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_t
p = begin;
}
while (p < end) {
- if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str, is_html)) {
+ if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str,
+ is_html)) {
if (url_str != NULL) {
new = rspamd_mempool_alloc0 (pool, sizeof (struct uri));
- ex = rspamd_mempool_alloc0 (pool, sizeof (struct process_exception));
+ ex =
+ rspamd_mempool_alloc0 (pool,
+ sizeof (struct process_exception));
if (new != NULL) {
g_strstrip (url_str);
rc = parse_uri (new, url_str, pool);
- if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) &&
- new->hostlen > 0) {
+ if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES ||
+ rc == URI_ERRNO_NO_HOST_SLASH) &&
+ new->hostlen > 0) {
ex->pos = url_start - begin;
ex->len = url_end - url_start;
if (new->protocol == PROTOCOL_MAILTO) {
@@ -1537,10 +1909,14 @@ url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_t
g_tree_insert (task->urls, new, new);
}
}
- part->urls_offset = g_list_prepend (part->urls_offset, ex);
+ part->urls_offset = g_list_prepend (
+ part->urls_offset,
+ ex);
}
else if (rc != URI_ERRNO_OK) {
- msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc));
+ msg_info ("extract of url '%s' failed: %s",
+ url_str,
+ url_strerror (rc));
}
}
}
@@ -1554,21 +1930,30 @@ url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_t
/* Handle offsets of this part */
if (part->urls_offset != NULL) {
part->urls_offset = g_list_reverse (part->urls_offset);
- rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, part->urls_offset);
+ rspamd_mempool_add_destructor (task->task_pool,
+ (rspamd_mempool_destruct_t)g_list_free, part->urls_offset);
}
}
gboolean
-url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **fin, gchar **url_str, gboolean is_html)
+url_try_text (rspamd_mempool_t *pool,
+ const gchar *begin,
+ gsize len,
+ gchar **start,
+ gchar **fin,
+ gchar **url_str,
+ gboolean is_html)
{
- const gchar *end, *pos;
- gint idx, l;
- struct url_matcher *matcher;
- url_match_t m;
+ const gchar *end, *pos;
+ gint idx, l;
+ struct url_matcher *matcher;
+ url_match_t m;
end = begin + len;
if (url_init () == 0) {
- if ((pos = rspamd_trie_lookup (url_scanner->patterns, begin, len, &idx)) == NULL) {
+ if ((pos =
+ rspamd_trie_lookup (url_scanner->patterns, begin, len,
+ &idx)) == NULL) {
return FALSE;
}
else {
@@ -1580,11 +1965,17 @@ url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **sta
m.pattern = matcher->pattern;
m.prefix = matcher->prefix;
m.add_prefix = FALSE;
- if (matcher->start (begin, end, pos, &m) && matcher->end (begin, end, pos, &m)) {
+ if (matcher->start (begin, end, pos,
+ &m) && matcher->end (begin, end, pos, &m)) {
if (m.add_prefix) {
l = m.m_len + 1 + strlen (m.prefix);
*url_str = rspamd_mempool_alloc (pool, l);
- rspamd_snprintf (*url_str, l, "%s%*s", m.prefix, m.m_len, m.m_begin);
+ rspamd_snprintf (*url_str,
+ l,
+ "%s%*s",
+ m.prefix,
+ m.m_len,
+ m.m_begin);
}
else {
*url_str = rspamd_mempool_alloc (pool, m.m_len + 1);