]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add function to normalize HTTP paths
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 8 Jan 2017 16:47:03 +0000 (16:47 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 8 Jan 2017 18:59:56 +0000 (18:59 +0000)
src/libutil/http.c
src/libutil/http.h

index e4f2fa725fced6050e761b3de2278fab35f15846..73db0d17acebac892a98ccad6e76ae5d49dc9de5 100644 (file)
@@ -3430,3 +3430,122 @@ rspamd_http_message_unref (struct rspamd_http_message *msg)
 {
        REF_RELEASE (msg);
 }
+
+
+void
+rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen)
+{
+       const gchar *p, *end, *c, *slash;
+       gchar *o;
+       enum {
+               st_normal = 0,
+               st_got_dot,
+               st_got_dot_dot,
+               st_got_slash,
+               st_got_slash_slash,
+       } state = st_normal;
+
+       p = path;
+       c = path;
+       end = path + len;
+       o = path;
+
+       while (p < end) {
+               switch (state) {
+               case st_normal:
+                       if (G_UNLIKELY (*p == '/')) {
+                               state = st_got_slash;
+                               c = p;
+                       }
+                       else if (G_UNLIKELY (*p == '.')) {
+                               state = st_got_dot;
+                               c = p;
+                       }
+                       else {
+                               *o++ = *p;
+                       }
+                       p ++;
+                       break;
+               case st_got_slash:
+                       if (G_UNLIKELY (*p == '/')) {
+                               /* Ignore double slash */
+                               *o++ = *p;
+                               state = st_got_slash_slash;
+                       }
+                       else if (G_UNLIKELY (*p == '.')) {
+                               state = st_got_dot;
+                       }
+                       else {
+                               *o++ = *p;
+                               state = st_normal;
+                       }
+                       p ++;
+                       break;
+               case st_got_slash_slash:
+                       if (G_LIKELY (*p != '/')) {
+                               *o++ = *p;
+                               state = st_normal;
+                       }
+                       p ++;
+                       break;
+               case st_got_dot:
+                       if (G_UNLIKELY (*p == '/')) {
+                               /* Remove any /./ or ./ paths */
+                               state = st_normal;
+                       }
+                       else if (*p == '.') {
+                               /* Double dot character */
+                               state = st_got_dot_dot;
+                       }
+                       else {
+                               /* We have something like .some or /.some */
+                               if (p > c) {
+                                       memcpy (o, c, p - c);
+                                       o += p - c;
+                               }
+
+                               state = st_normal;
+                       }
+                       p ++;
+                       break;
+               case st_got_dot_dot:
+                       if (*p == '/') {
+                               /* We have something like /../ or ../ */
+                               if (*c == '/') {
+                                       /* We need to remove the last component from o if it is there */
+                                       slash = rspamd_memrchr (path, '/', o - path);
+
+                                       if (slash) {
+                                               o = (gchar *)slash;
+                                       }
+                                       /* Otherwise we remove these dots */
+                                       state = st_normal;
+                               }
+                               else {
+                                       /* We have something like bla../, so we need to copy it as is */
+                                       if (p > c) {
+                                               memcpy (o, c, p - c);
+                                               o += p - c;
+                                       }
+
+                                       state = st_normal;
+                               }
+                       }
+                       else {
+                               /* We have something like ..bla or ... */
+                               if (p > c) {
+                                       memcpy (o, c, p - c);
+                                       o += p - c;
+                               }
+
+                               state = st_normal;
+                       }
+                       p ++;
+                       break;
+               }
+       }
+
+       if (nlen) {
+               *nlen = (o - path);
+       }
+}
index f02c01a046bc9c65ed34dd897ce01a61997c5ec6..0e828d3ff12134f613758eda01a3f4782fac000b 100644 (file)
@@ -501,4 +501,13 @@ GHashTable* rspamd_http_message_parse_query (struct rspamd_http_message *msg);
  */
 glong rspamd_http_date_format (gchar *buf, gsize len, time_t time);
 
+/**
+ * Normalize HTTP path removing dot sequences and repeating '/' symbols as
+ * per rfc3986#section-5.2
+ * @param path
+ * @param len
+ * @param nlen
+ */
+void rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen);
+
 #endif /* HTTP_H_ */