aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-01-08 16:47:03 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-01-08 18:59:56 +0000
commit5dcc6259674c783fc76ffde7789d3c76b9d49fee (patch)
tree4c9dc2e245b1728faefa2fc884388ce941ac000c /src
parent95f9bd8b32b17261e168d53a50dd1c9e4d2a752b (diff)
downloadrspamd-5dcc6259674c783fc76ffde7789d3c76b9d49fee.tar.gz
rspamd-5dcc6259674c783fc76ffde7789d3c76b9d49fee.zip
[Feature] Add function to normalize HTTP paths
Diffstat (limited to 'src')
-rw-r--r--src/libutil/http.c119
-rw-r--r--src/libutil/http.h9
2 files changed, 128 insertions, 0 deletions
diff --git a/src/libutil/http.c b/src/libutil/http.c
index e4f2fa725..73db0d17a 100644
--- a/src/libutil/http.c
+++ b/src/libutil/http.c
@@ -3430,3 +3430,122 @@ rspamd_http_message_unref (struct rspamd_http_message *msg)
{
REF_RELEASE (msg);
}
+
+
+void
+rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen)
+{
+ const gchar *p, *end, *c, *slash;
+ gchar *o;
+ enum {
+ st_normal = 0,
+ st_got_dot,
+ st_got_dot_dot,
+ st_got_slash,
+ st_got_slash_slash,
+ } state = st_normal;
+
+ p = path;
+ c = path;
+ end = path + len;
+ o = path;
+
+ while (p < end) {
+ switch (state) {
+ case st_normal:
+ if (G_UNLIKELY (*p == '/')) {
+ state = st_got_slash;
+ c = p;
+ }
+ else if (G_UNLIKELY (*p == '.')) {
+ state = st_got_dot;
+ c = p;
+ }
+ else {
+ *o++ = *p;
+ }
+ p ++;
+ break;
+ case st_got_slash:
+ if (G_UNLIKELY (*p == '/')) {
+ /* Ignore double slash */
+ *o++ = *p;
+ state = st_got_slash_slash;
+ }
+ else if (G_UNLIKELY (*p == '.')) {
+ state = st_got_dot;
+ }
+ else {
+ *o++ = *p;
+ state = st_normal;
+ }
+ p ++;
+ break;
+ case st_got_slash_slash:
+ if (G_LIKELY (*p != '/')) {
+ *o++ = *p;
+ state = st_normal;
+ }
+ p ++;
+ break;
+ case st_got_dot:
+ if (G_UNLIKELY (*p == '/')) {
+ /* Remove any /./ or ./ paths */
+ state = st_normal;
+ }
+ else if (*p == '.') {
+ /* Double dot character */
+ state = st_got_dot_dot;
+ }
+ else {
+ /* We have something like .some or /.some */
+ if (p > c) {
+ memcpy (o, c, p - c);
+ o += p - c;
+ }
+
+ state = st_normal;
+ }
+ p ++;
+ break;
+ case st_got_dot_dot:
+ if (*p == '/') {
+ /* We have something like /../ or ../ */
+ if (*c == '/') {
+ /* We need to remove the last component from o if it is there */
+ slash = rspamd_memrchr (path, '/', o - path);
+
+ if (slash) {
+ o = (gchar *)slash;
+ }
+ /* Otherwise we remove these dots */
+ state = st_normal;
+ }
+ else {
+ /* We have something like bla../, so we need to copy it as is */
+ if (p > c) {
+ memcpy (o, c, p - c);
+ o += p - c;
+ }
+
+ state = st_normal;
+ }
+ }
+ else {
+ /* We have something like ..bla or ... */
+ if (p > c) {
+ memcpy (o, c, p - c);
+ o += p - c;
+ }
+
+ state = st_normal;
+ }
+ p ++;
+ break;
+ }
+ }
+
+ if (nlen) {
+ *nlen = (o - path);
+ }
+}
diff --git a/src/libutil/http.h b/src/libutil/http.h
index f02c01a04..0e828d3ff 100644
--- a/src/libutil/http.h
+++ b/src/libutil/http.h
@@ -501,4 +501,13 @@ GHashTable* rspamd_http_message_parse_query (struct rspamd_http_message *msg);
*/
glong rspamd_http_date_format (gchar *buf, gsize len, time_t time);
+/**
+ * Normalize HTTP path removing dot sequences and repeating '/' symbols as
+ * per rfc3986#section-5.2
+ * @param path
+ * @param len
+ * @param nlen
+ */
+void rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen);
+
#endif /* HTTP_H_ */