diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-02-20 12:57:20 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-02-20 12:57:20 +0000 |
commit | ef78035beba3a8bba8b7aa8f7341ef6c7a36f26b (patch) | |
tree | 6b5470ac8a1b47d4445ce52a41d5bfd92d3da450 | |
parent | e7cef66e2e5f84ed1de4ba2c44343ed09f4844b9 (diff) | |
download | rspamd-ef78035beba3a8bba8b7aa8f7341ef6c7a36f26b.tar.gz rspamd-ef78035beba3a8bba8b7aa8f7341ef6c7a36f26b.zip |
More fixes to urls parser.
-rw-r--r-- | src/libserver/url.c | 79 |
1 files changed, 39 insertions, 40 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index 23620f11e..2883f8027 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -739,17 +739,18 @@ enum { IS_URLSAFE)) != 0) void -rspamd_unescape_uri (gchar **dst, gchar **src, gsize size) +rspamd_unescape_uri (gchar *dst, const gchar *src, gsize size) { - gchar *d, *s, ch, c, decoded; + gchar *d, ch, c, decoded; + const gchar *s; enum { sw_usual = 0, sw_quoted, sw_quoted_second } state; - d = *dst; - s = *src; + d = dst; + s = src; state = 0; decoded = 0; @@ -760,10 +761,6 @@ rspamd_unescape_uri (gchar **dst, gchar **src, gsize size) switch (state) { case sw_usual: - if (ch == '?') { - *d++ = ch; - goto done; - } if (ch == '%') { state = sw_quoted; @@ -811,11 +808,6 @@ rspamd_unescape_uri (gchar **dst, gchar **src, gsize size) if (c >= 'a' && c <= 'f') { ch = ((decoded << 4) + c - 'a' + 10); - if (ch == '?') { - *d++ = ch; - goto done; - } - *d++ = ch; break; } @@ -825,10 +817,7 @@ rspamd_unescape_uri (gchar **dst, gchar **src, gsize size) } } - done: - - *dst = d; - *src = s; + *d = '\0'; } const gchar * @@ -1050,11 +1039,12 @@ static gint rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, gchar const **end, gboolean strict) { - const gchar *p = str, *c = str, *last = str + len; + const gchar *p = str, *c = str, *last = str + len, *slash = NULL; gchar t; gunichar uc; glong pt; gint ret = 1; + gboolean user_seen = FALSE; enum { parse_protocol, parse_slash, @@ -1065,6 +1055,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, parse_password_start, parse_password, parse_domain, + parse_port_password, parse_port, parse_suffix_slash, parse_path, @@ -1103,15 +1094,8 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, break; case parse_slash_slash: c = p; - - /* XXX: inefficient lookahead */ - if (strchr (p, '@') != NULL) { - st = parse_user; - } - else { - st = parse_domain; - } - + st = parse_domain; + slash = p; break; case parse_user: if (t == ':') { @@ -1164,13 +1148,16 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, if (p - c == 0) { goto out; } - SET_U (u, UF_HOST); - if (t == '/') { + SET_U (u, UF_HOST); st = parse_suffix_slash; } - else { - st = parse_port; + else if (!user_seen) { + /* + * Here we can have both port and password, hence we need + * to apply some heuristic here + */ + st = parse_port_password; c = p + 1; } p ++; @@ -1196,13 +1183,31 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, } } break; + case parse_port_password: + if (g_ascii_isdigit (t)) { + /* XXX: that breaks urls with passwords starting with number */ + st = parse_port; + c = slash; + SET_U (u, UF_HOST); + c = p; + } + else { + /* Rewind back */ + p = slash; + c = slash; + user_seen = TRUE; + st = parse_user; + } + break; case parse_port: if (t == '/') { pt = strtoul (c, NULL, 10); if (pt == 0 || pt > 65535) { goto out; } - u->port = pt; + if (u != NULL) { + u->port = pt; + } st = parse_suffix_slash; } else if (!g_ascii_isdigit (t)) { @@ -1432,14 +1437,8 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, } /* Now decode url symbols */ - uri->string = rspamd_mempool_strdup (pool, p); - - if (uri->datalen > 0) { - rspamd_unescape_uri (&uri->data, &uri->data, uri->datalen); - } - if (uri->querylen > 0) { - rspamd_unescape_uri (&uri->query, &uri->query, uri->querylen); - } + uri->string = p; + rspamd_unescape_uri (uri->string, uri->string, len); rspamd_str_lc (uri->string, uri->protocollen); rspamd_str_lc (uri->host, uri->hostlen); |