summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-05-12 14:39:09 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-05-12 14:39:09 +0100
commitdb4ef545172db3417684695df1ecca3de001c93f (patch)
tree8eec404329944875c592555fdf826bfea69390f2 /src
parent0adcee676505367509681d03ed4e7d611f6de6a0 (diff)
downloadrspamd-db4ef545172db3417684695df1ecca3de001c93f.tar.gz
rspamd-db4ef545172db3417684695df1ecca3de001c93f.zip
[Minor] Fix parsing of some bogus urls
Diffstat (limited to 'src')
-rw-r--r--src/libserver/url.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c
index eb663519d..8a33b4915 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1113,10 +1113,35 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
if (t != '/' && t != '\\') {
c = p;
- st = parse_domain_start;
slash = p;
+ st = parse_domain_start;
+
+ /*
+ * Unfortunately, due to brain damage of the RFC 3986 authors,
+ * we have to distinguish two possibilities here:
+ * authority = [ userinfo "@" ] host [ ":" port ]
+ * So if we have @ somewhere before hostname then we must process
+ * with the username state. Otherwise, we have to process via
+ * the hostname state. Unfortunately, there is no way to distinguish
+ * them aside of running NFA or two DFA or performing lookahead.
+ * Lookahead approach looks easier to implement.
+ */
+
+ const char *tp = p;
+ while (tp < last) {
+ if (*tp == '@') {
+ user_seen = TRUE;
+ st = parse_user;
+ break;
+ }
+ else if (*tp == '/' || *tp == '#' || *tp == '?') {
+ st = parse_domain_start;
+ }
+
+ tp ++;
+ }
- if (*p == '[') {
+ if (st == parse_domain_start && *p == '[') {
st = parse_ipv6;
p++;
c = p;