diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-02-19 16:18:06 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-02-19 16:18:06 +0000 |
commit | 37b4f84e4e90bc73c66eea9d91c3aba831567bdc (patch) | |
tree | c8d088632628b40eb8ae8c2a85b935601bbef9ea | |
parent | fb944bf43ed19dbefde63d009fb226ae552979e9 (diff) | |
download | rspamd-37b4f84e4e90bc73c66eea9d91c3aba831567bdc.tar.gz rspamd-37b4f84e4e90bc73c66eea9d91c3aba831567bdc.zip |
Write parser for urls.
-rw-r--r-- | src/libserver/url.c | 281 |
1 files changed, 275 insertions, 6 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index d3a255a07..88c86d6ce 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -928,6 +928,10 @@ rspamd_mailto_parse (struct http_parser_url *u, const gchar *str) } if (st == parse_domain) { + if (p - c == 0) { + return 1; + } + u->field_set |= 1 << UF_HOST; u->field_data[UF_HOST].len = p - c; u->field_data[UF_HOST].off = c - str; @@ -935,9 +939,11 @@ rspamd_mailto_parse (struct http_parser_url *u, const gchar *str) return 0; } else if (st == parse_query) { - u->field_set |= 1 << UF_QUERY; - u->field_data[UF_QUERY].len = p - c; - u->field_data[UF_QUERY].off = c - str; + if (p - c > 0) { + u->field_set |= 1 << UF_QUERY; + u->field_data[UF_QUERY].len = p - c; + u->field_data[UF_QUERY].off = c - str; + } return 0; } @@ -945,6 +951,270 @@ rspamd_mailto_parse (struct http_parser_url *u, const gchar *str) return 1; } +static gint +rspamd_web_parse (struct http_parser_url *u, const gchar *str) +{ + const gchar *p = str, *c = str; + gchar t; + gunichar uc; + glong pt; + gint ret = 1; + enum { + parse_protocol, + parse_slash, + parse_slash_slash, + parse_semicolon, + parse_user, + parse_at, + parse_password_start, + parse_password, + parse_domain, + parse_port, + parse_suffix_slash, + parse_path, + parse_query, + parse_part + } st = parse_protocol; + + while (*p) { + t = *p; + + switch (st) { + case parse_protocol: + if (t == ':') { + st = parse_semicolon; + u->field_set |= 1 << UF_SCHEMA; + u->field_data[UF_SCHEMA].len = p - c; + u->field_data[UF_SCHEMA].off = 0; + } + p ++; + break; + case parse_semicolon: + if (t == '/') { + st = parse_slash; + p ++; + } + else { + st = parse_slash_slash; + } + break; + case parse_slash: + if (t == '/') { + st = parse_slash_slash; + } + else { + return 1; + } + p ++; + break; + case parse_slash_slash: + c = p; + + /* XXX: inefficient lookahead */ + if (strchr (p, '@') != NULL) { + st = parse_user; + } + else { + st = parse_domain; + } + + break; + case parse_user: + if (t == ':') { + if (p - c == 0) { + return 1; + } + u->field_set |= 1 << UF_USERINFO; + u->field_data[UF_USERINFO].len = p - c; + u->field_data[UF_USERINFO].off = c - str; + st = parse_password_start; + } + else if (t == '@') { + /* No password */ + if (p - c == 0) { + return 1; + } + u->field_set |= 1 << UF_USERINFO; + u->field_data[UF_USERINFO].len = p - c; + u->field_data[UF_USERINFO].off = c - str; + st = parse_at; + } + else if (!is_atom (t)) { + return 1; + } + p ++; + break; + case parse_password_start: + if (t == '@') { + /* Empty password */ + st = parse_at; + } + else { + c = p; + st = parse_password; + } + p ++; + break; + case parse_password: + if (t == '@') { + /* XXX: password is not stored */ + st = parse_at; + } + else if (!is_atom (t)) { + return 1; + } + p ++; + break; + case parse_at: + c = p; + st = parse_domain; + break; + case parse_domain: + if (t == '/' || t == ':') { + if (p - c == 0) { + return 1; + } + u->field_set |= 1 << UF_HOST; + u->field_data[UF_HOST].len = p - c; + u->field_data[UF_HOST].off = c - str; + + if (t == '/') { + st = parse_suffix_slash; + } + else { + st = parse_port; + } + p ++; + } + else { + if (*p != '.' && *p != '-' && *p != '_') { + uc = g_utf8_get_char_validated (p, -1); + + if (uc == (gunichar)-1) { + /* Bad utf8 */ + return 1; + } + + if (!g_unichar_isalnum (uc)) { + /* Bad symbol */ + return 1; + } + + p = g_utf8_next_char (p); + } + else { + p ++; + } + } + break; + case parse_port: + if (t == '/') { + pt = strtoul (c, NULL, 10); + if (pt == 0 || pt > 65535) { + return 1; + } + u->port = pt; + st = parse_suffix_slash; + } + else if (!g_ascii_isdigit (t)) { + return 1; + } + p ++; + break; + case parse_suffix_slash: + if (t != '/') { + c = p; + st = parse_path; + } + else { + /* Skip extra slashes */ + p ++; + } + break; + case parse_path: + if (t == '?') { + if (p - c != 0) { + u->field_set |= 1 << UF_PATH; + u->field_data[UF_PATH].len = p - c; + u->field_data[UF_PATH].off = c - str; + } + c = p + 1; + st = parse_query; + } + p ++; + break; + case parse_query: + if (t == '#') { + if (p - c != 0) { + u->field_set |= 1 << UF_QUERY; + u->field_data[UF_QUERY].len = p - c; + u->field_data[UF_QUERY].off = c - str; + } + c = p + 1; + st = parse_part; + } + p ++; + break; + case parse_part: + /* Allow anything here */ + p ++; + break; + } + } + + /* Parse remaining */ + switch (st) { + case parse_domain: + if (p - c == 0) { + return 1; + } + u->field_set |= 1 << UF_HOST; + u->field_data[UF_HOST].len = p - c; + u->field_data[UF_HOST].off = c - str; + ret = 0; + + break; + case parse_port: + pt = strtoul (c, NULL, 10); + if (pt == 0 || pt > 65535) { + return 1; + } + u->port = pt; + ret = 0; + break; + case parse_path: + if (p - c > 0) { + u->field_set |= 1 << UF_PATH; + u->field_data[UF_PATH].len = p - c; + u->field_data[UF_PATH].off = c - str; + } + ret = 0; + break; + case parse_query: + if (p - c > 0) { + u->field_set |= 1 << UF_QUERY; + u->field_data[UF_QUERY].len = p - c; + u->field_data[UF_QUERY].off = c - str; + } + ret = 0; + break; + case parse_part: + if (p - c > 0) { + u->field_set |= 1 << UF_FRAGMENT; + u->field_data[UF_FRAGMENT].len = p - c; + u->field_data[UF_FRAGMENT].off = c - str; + } + ret = 0; + break; + default: + /* Error state */ + ret = 1; + break; + } + + return ret; +} + enum uri_errno rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, rspamd_mempool_t *pool) @@ -1003,7 +1273,6 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, } uri->string = p; - len = strlen (p); rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t)g_free, p); @@ -1013,11 +1282,11 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, ret = rspamd_mailto_parse (&u, p); } else { - ret = http_parser_parse_url (p, len, 0, &u); + ret = rspamd_web_parse (&u, p); } } else { - ret = http_parser_parse_url (p, len, 0, &u); + ret = rspamd_web_parse (&u, p); } if (ret != 0) { |