Browse Source

More fixes to urls parser.

tags/0.9.0
Vsevolod Stakhov 9 years ago
parent
commit
ef78035beb
1 changed files with 39 additions and 40 deletions
  1. 39
    40
      src/libserver/url.c

+ 39
- 40
src/libserver/url.c View File

@@ -739,17 +739,18 @@ enum {
IS_URLSAFE)) != 0)

void
rspamd_unescape_uri (gchar **dst, gchar **src, gsize size)
rspamd_unescape_uri (gchar *dst, const gchar *src, gsize size)
{
gchar *d, *s, ch, c, decoded;
gchar *d, ch, c, decoded;
const gchar *s;
enum {
sw_usual = 0,
sw_quoted,
sw_quoted_second
} state;

d = *dst;
s = *src;
d = dst;
s = src;

state = 0;
decoded = 0;
@@ -760,10 +761,6 @@ rspamd_unescape_uri (gchar **dst, gchar **src, gsize size)

switch (state) {
case sw_usual:
if (ch == '?') {
*d++ = ch;
goto done;
}

if (ch == '%') {
state = sw_quoted;
@@ -811,11 +808,6 @@ rspamd_unescape_uri (gchar **dst, gchar **src, gsize size)
if (c >= 'a' && c <= 'f') {
ch = ((decoded << 4) + c - 'a' + 10);

if (ch == '?') {
*d++ = ch;
goto done;
}

*d++ = ch;
break;
}
@@ -825,10 +817,7 @@ rspamd_unescape_uri (gchar **dst, gchar **src, gsize size)
}
}

done:

*dst = d;
*src = s;
*d = '\0';
}

const gchar *
@@ -1050,11 +1039,12 @@ static gint
rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
gchar const **end, gboolean strict)
{
const gchar *p = str, *c = str, *last = str + len;
const gchar *p = str, *c = str, *last = str + len, *slash = NULL;
gchar t;
gunichar uc;
glong pt;
gint ret = 1;
gboolean user_seen = FALSE;
enum {
parse_protocol,
parse_slash,
@@ -1065,6 +1055,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
parse_password_start,
parse_password,
parse_domain,
parse_port_password,
parse_port,
parse_suffix_slash,
parse_path,
@@ -1103,15 +1094,8 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
break;
case parse_slash_slash:
c = p;

/* XXX: inefficient lookahead */
if (strchr (p, '@') != NULL) {
st = parse_user;
}
else {
st = parse_domain;
}

st = parse_domain;
slash = p;
break;
case parse_user:
if (t == ':') {
@@ -1164,13 +1148,16 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
if (p - c == 0) {
goto out;
}
SET_U (u, UF_HOST);

if (t == '/') {
SET_U (u, UF_HOST);
st = parse_suffix_slash;
}
else {
st = parse_port;
else if (!user_seen) {
/*
* Here we can have both port and password, hence we need
* to apply some heuristic here
*/
st = parse_port_password;
c = p + 1;
}
p ++;
@@ -1196,13 +1183,31 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
}
}
break;
case parse_port_password:
if (g_ascii_isdigit (t)) {
/* XXX: that breaks urls with passwords starting with number */
st = parse_port;
c = slash;
SET_U (u, UF_HOST);
c = p;
}
else {
/* Rewind back */
p = slash;
c = slash;
user_seen = TRUE;
st = parse_user;
}
break;
case parse_port:
if (t == '/') {
pt = strtoul (c, NULL, 10);
if (pt == 0 || pt > 65535) {
goto out;
}
u->port = pt;
if (u != NULL) {
u->port = pt;
}
st = parse_suffix_slash;
}
else if (!g_ascii_isdigit (t)) {
@@ -1432,14 +1437,8 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
}

/* Now decode url symbols */
uri->string = rspamd_mempool_strdup (pool, p);

if (uri->datalen > 0) {
rspamd_unescape_uri (&uri->data, &uri->data, uri->datalen);
}
if (uri->querylen > 0) {
rspamd_unescape_uri (&uri->query, &uri->query, uri->querylen);
}
uri->string = p;
rspamd_unescape_uri (uri->string, uri->string, len);
rspamd_str_lc (uri->string, uri->protocollen);
rspamd_str_lc (uri->host, uri->hostlen);


Loading…
Cancel
Save