From 5dcc6259674c783fc76ffde7789d3c76b9d49fee Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 8 Jan 2017 16:47:03 +0000 Subject: [PATCH] [Feature] Add function to normalize HTTP paths --- src/libutil/http.c | 119 +++++++++++++++++++++++++++++++++++++++++++++ src/libutil/http.h | 9 ++++ 2 files changed, 128 insertions(+) diff --git a/src/libutil/http.c b/src/libutil/http.c index e4f2fa725..73db0d17a 100644 --- a/src/libutil/http.c +++ b/src/libutil/http.c @@ -3430,3 +3430,122 @@ rspamd_http_message_unref (struct rspamd_http_message *msg) { REF_RELEASE (msg); } + + +void +rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen) +{ + const gchar *p, *end, *c, *slash; + gchar *o; + enum { + st_normal = 0, + st_got_dot, + st_got_dot_dot, + st_got_slash, + st_got_slash_slash, + } state = st_normal; + + p = path; + c = path; + end = path + len; + o = path; + + while (p < end) { + switch (state) { + case st_normal: + if (G_UNLIKELY (*p == '/')) { + state = st_got_slash; + c = p; + } + else if (G_UNLIKELY (*p == '.')) { + state = st_got_dot; + c = p; + } + else { + *o++ = *p; + } + p ++; + break; + case st_got_slash: + if (G_UNLIKELY (*p == '/')) { + /* Ignore double slash */ + *o++ = *p; + state = st_got_slash_slash; + } + else if (G_UNLIKELY (*p == '.')) { + state = st_got_dot; + } + else { + *o++ = *p; + state = st_normal; + } + p ++; + break; + case st_got_slash_slash: + if (G_LIKELY (*p != '/')) { + *o++ = *p; + state = st_normal; + } + p ++; + break; + case st_got_dot: + if (G_UNLIKELY (*p == '/')) { + /* Remove any /./ or ./ paths */ + state = st_normal; + } + else if (*p == '.') { + /* Double dot character */ + state = st_got_dot_dot; + } + else { + /* We have something like .some or /.some */ + if (p > c) { + memcpy (o, c, p - c); + o += p - c; + } + + state = st_normal; + } + p ++; + break; + case st_got_dot_dot: + if (*p == '/') { + /* We have something like /../ or ../ */ + if (*c == '/') { + /* We need to remove the last component from o if it is there */ + slash = rspamd_memrchr (path, '/', o - path); + + if (slash) { + o = (gchar *)slash; + } + /* Otherwise we remove these dots */ + state = st_normal; + } + else { + /* We have something like bla../, so we need to copy it as is */ + if (p > c) { + memcpy (o, c, p - c); + o += p - c; + } + + state = st_normal; + } + } + else { + /* We have something like ..bla or ... */ + if (p > c) { + memcpy (o, c, p - c); + o += p - c; + } + + state = st_normal; + } + p ++; + break; + } + } + + if (nlen) { + *nlen = (o - path); + } +} diff --git a/src/libutil/http.h b/src/libutil/http.h index f02c01a04..0e828d3ff 100644 --- a/src/libutil/http.h +++ b/src/libutil/http.h @@ -501,4 +501,13 @@ GHashTable* rspamd_http_message_parse_query (struct rspamd_http_message *msg); */ glong rspamd_http_date_format (gchar *buf, gsize len, time_t time); +/** + * Normalize HTTP path removing dot sequences and repeating '/' symbols as + * per rfc3986#section-5.2 + * @param path + * @param len + * @param nlen + */ +void rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen); + #endif /* HTTP_H_ */ -- 2.39.5