aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-02-20 12:57:20 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-02-20 12:57:20 +0000
commitef78035beba3a8bba8b7aa8f7341ef6c7a36f26b (patch)
tree6b5470ac8a1b47d4445ce52a41d5bfd92d3da450
parente7cef66e2e5f84ed1de4ba2c44343ed09f4844b9 (diff)
downloadrspamd-ef78035beba3a8bba8b7aa8f7341ef6c7a36f26b.tar.gz
rspamd-ef78035beba3a8bba8b7aa8f7341ef6c7a36f26b.zip
More fixes to urls parser.
-rw-r--r--src/libserver/url.c79
1 files changed, 39 insertions, 40 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 23620f11e..2883f8027 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -739,17 +739,18 @@ enum {
IS_URLSAFE)) != 0)
void
-rspamd_unescape_uri (gchar **dst, gchar **src, gsize size)
+rspamd_unescape_uri (gchar *dst, const gchar *src, gsize size)
{
- gchar *d, *s, ch, c, decoded;
+ gchar *d, ch, c, decoded;
+ const gchar *s;
enum {
sw_usual = 0,
sw_quoted,
sw_quoted_second
} state;
- d = *dst;
- s = *src;
+ d = dst;
+ s = src;
state = 0;
decoded = 0;
@@ -760,10 +761,6 @@ rspamd_unescape_uri (gchar **dst, gchar **src, gsize size)
switch (state) {
case sw_usual:
- if (ch == '?') {
- *d++ = ch;
- goto done;
- }
if (ch == '%') {
state = sw_quoted;
@@ -811,11 +808,6 @@ rspamd_unescape_uri (gchar **dst, gchar **src, gsize size)
if (c >= 'a' && c <= 'f') {
ch = ((decoded << 4) + c - 'a' + 10);
- if (ch == '?') {
- *d++ = ch;
- goto done;
- }
-
*d++ = ch;
break;
}
@@ -825,10 +817,7 @@ rspamd_unescape_uri (gchar **dst, gchar **src, gsize size)
}
}
- done:
-
- *dst = d;
- *src = s;
+ *d = '\0';
}
const gchar *
@@ -1050,11 +1039,12 @@ static gint
rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
gchar const **end, gboolean strict)
{
- const gchar *p = str, *c = str, *last = str + len;
+ const gchar *p = str, *c = str, *last = str + len, *slash = NULL;
gchar t;
gunichar uc;
glong pt;
gint ret = 1;
+ gboolean user_seen = FALSE;
enum {
parse_protocol,
parse_slash,
@@ -1065,6 +1055,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
parse_password_start,
parse_password,
parse_domain,
+ parse_port_password,
parse_port,
parse_suffix_slash,
parse_path,
@@ -1103,15 +1094,8 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
break;
case parse_slash_slash:
c = p;
-
- /* XXX: inefficient lookahead */
- if (strchr (p, '@') != NULL) {
- st = parse_user;
- }
- else {
- st = parse_domain;
- }
-
+ st = parse_domain;
+ slash = p;
break;
case parse_user:
if (t == ':') {
@@ -1164,13 +1148,16 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
if (p - c == 0) {
goto out;
}
- SET_U (u, UF_HOST);
-
if (t == '/') {
+ SET_U (u, UF_HOST);
st = parse_suffix_slash;
}
- else {
- st = parse_port;
+ else if (!user_seen) {
+ /*
+ * Here we can have both port and password, hence we need
+ * to apply some heuristic here
+ */
+ st = parse_port_password;
c = p + 1;
}
p ++;
@@ -1196,13 +1183,31 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
}
}
break;
+ case parse_port_password:
+ if (g_ascii_isdigit (t)) {
+ /* XXX: that breaks urls with passwords starting with number */
+ st = parse_port;
+ c = slash;
+ SET_U (u, UF_HOST);
+ c = p;
+ }
+ else {
+ /* Rewind back */
+ p = slash;
+ c = slash;
+ user_seen = TRUE;
+ st = parse_user;
+ }
+ break;
case parse_port:
if (t == '/') {
pt = strtoul (c, NULL, 10);
if (pt == 0 || pt > 65535) {
goto out;
}
- u->port = pt;
+ if (u != NULL) {
+ u->port = pt;
+ }
st = parse_suffix_slash;
}
else if (!g_ascii_isdigit (t)) {
@@ -1432,14 +1437,8 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
}
/* Now decode url symbols */
- uri->string = rspamd_mempool_strdup (pool, p);
-
- if (uri->datalen > 0) {
- rspamd_unescape_uri (&uri->data, &uri->data, uri->datalen);
- }
- if (uri->querylen > 0) {
- rspamd_unescape_uri (&uri->query, &uri->query, uri->querylen);
- }
+ uri->string = p;
+ rspamd_unescape_uri (uri->string, uri->string, len);
rspamd_str_lc (uri->string, uri->protocollen);
rspamd_str_lc (uri->host, uri->hostlen);