]> source.dussan.org Git - rspamd.git/commitdiff
[Test] Add tests, fix normalization algorithm
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 8 Jan 2017 18:24:53 +0000 (18:24 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 8 Jan 2017 18:59:56 +0000 (18:59 +0000)
src/libutil/http.c
test/lua/unit/url.lua
test/rspamd_test_suite.c

index 73db0d17acebac892a98ccad6e76ae5d49dc9de5..fdc9d1294a07da5a23119c13cb731fdbd5540da8 100644 (file)
@@ -3435,7 +3435,7 @@ rspamd_http_message_unref (struct rspamd_http_message *msg)
 void
 rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen)
 {
-       const gchar *p, *end, *c, *slash;
+       const gchar *p, *end, *slash = NULL, *dot = NULL;
        gchar *o;
        enum {
                st_normal = 0,
@@ -3446,7 +3446,6 @@ rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen)
        } state = st_normal;
 
        p = path;
-       c = path;
        end = path + len;
        o = path;
 
@@ -3455,11 +3454,11 @@ rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen)
                case st_normal:
                        if (G_UNLIKELY (*p == '/')) {
                                state = st_got_slash;
-                               c = p;
+                               slash = p;
                        }
                        else if (G_UNLIKELY (*p == '.')) {
                                state = st_got_dot;
-                               c = p;
+                               dot = p;
                        }
                        else {
                                *o++ = *p;
@@ -3473,24 +3472,38 @@ rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen)
                                state = st_got_slash_slash;
                        }
                        else if (G_UNLIKELY (*p == '.')) {
+                               dot = p;
                                state = st_got_dot;
                        }
                        else {
+                               *o++ = '/';
                                *o++ = *p;
+                               slash = NULL;
+                               dot = NULL;
                                state = st_normal;
                        }
                        p ++;
                        break;
                case st_got_slash_slash:
                        if (G_LIKELY (*p != '/')) {
-                               *o++ = *p;
+                               slash = p - 1;
+                               dot = NULL;
                                state = st_normal;
+                               continue;
                        }
                        p ++;
                        break;
                case st_got_dot:
                        if (G_UNLIKELY (*p == '/')) {
                                /* Remove any /./ or ./ paths */
+                               if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
+                                       /* Preserve one slash */
+                                       *o++ = '/';
+                               }
+
+                               slash = p;
+                               dot = NULL;
+                               /* Ignore last slash */
                                state = st_normal;
                        }
                        else if (*p == '.') {
@@ -3499,52 +3512,117 @@ rspamd_http_normalize_path_inplace (gchar *path, gsize len, gsize *nlen)
                        }
                        else {
                                /* We have something like .some or /.some */
-                               if (p > c) {
-                                       memcpy (o, c, p - c);
-                                       o += p - c;
+                               if (dot && p > dot) {
+                                       memmove (o, dot, p - dot);
+                                       o += p - dot;
                                }
 
+                               slash = NULL;
+                               dot = NULL;
                                state = st_normal;
+                               continue;
                        }
+
                        p ++;
                        break;
                case st_got_dot_dot:
                        if (*p == '/') {
                                /* We have something like /../ or ../ */
-                               if (*c == '/') {
+                               if (slash) {
                                        /* We need to remove the last component from o if it is there */
-                                       slash = rspamd_memrchr (path, '/', o - path);
+                                       if (o > path + 2 && *(o - 1) == '/') {
+                                               slash = rspamd_memrchr (path, '/', o - path - 2);
+                                       }
+                                       else if (o > path + 1) {
+                                               slash = rspamd_memrchr (path, '/', o - path - 1);
+                                       }
+                                       else {
+                                               slash = NULL;
+                                       }
 
                                        if (slash) {
                                                o = (gchar *)slash;
                                        }
-                                       /* Otherwise we remove these dots */
-                                       state = st_normal;
+                                       /* Otherwise we keep these dots */
+                                       slash = p;
+                                       state = st_got_slash;
                                }
                                else {
                                        /* We have something like bla../, so we need to copy it as is */
-                                       if (p > c) {
-                                               memcpy (o, c, p - c);
-                                               o += p - c;
+
+                                       if (slash) {
+                                               *o ++ = '/';
+                                       }
+                                       if (dot && p > dot) {
+                                               memcpy (o, dot, p - dot);
+                                               o += p - dot;
                                        }
 
+                                       slash = NULL;
+                                       dot = NULL;
                                        state = st_normal;
+                                       continue;
                                }
                        }
                        else {
                                /* We have something like ..bla or ... */
-                               if (p > c) {
-                                       memcpy (o, c, p - c);
-                                       o += p - c;
+                               if (slash) {
+                                       *o ++ = '/';
                                }
 
+                               if (dot && p > dot) {
+                                       memmove (o, dot, p - dot);
+                                       o += p - dot;
+                               }
+
+                               slash = NULL;
+                               dot = NULL;
                                state = st_normal;
+                               continue;
                        }
+
                        p ++;
                        break;
                }
        }
 
+       /* Leftover */
+       switch (state) {
+       case st_got_dot_dot:
+               /* Trailing .. */
+               if (slash) {
+                       /* We need to remove the last component from o if it is there */
+                       if (o > path + 2 && *(o - 1) == '/') {
+                               slash = rspamd_memrchr (path, '/', o - path - 2);
+                       }
+                       else if (o > path + 1) {
+                               slash = rspamd_memrchr (path, '/', o - path - 1);
+                       }
+                       else {
+                               if (o == path) {
+                                       /* Corner case */
+                                       *o++ = '/';
+                               }
+
+                               slash = NULL;
+                       }
+
+                       if (slash) {
+                               /* Remove last / */
+                               o = (gchar *)slash;
+                       }
+               }
+               break;
+       case st_got_slash:
+               *o++ = '/';
+               break;
+       default:
+               if (o > path + 1 && *(o - 1) == '/') {
+                       o --;
+               }
+               break;
+       }
+
        if (nlen) {
                *nlen = (o - path);
        }
index de274425d6e08fa96600b5d04a1c7d54e9b4f454..991c6d0d07126e0fd25cc8ac2e388f017b2634b1 100644 (file)
@@ -8,6 +8,7 @@ context("URL check functions", function()
   ffi.cdef[[
   void rspamd_url_init (const char *tld_file);
   unsigned ottery_rand_range(unsigned top);
+  void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen);
   ]]
 
   local test_dir = string.gsub(debug.getinfo(1).source, "^@(.+/)[^/]+$", "%1")
@@ -119,4 +120,31 @@ context("URL check functions", function()
     end
   end
   )
+  test("Normalize paths", function()
+    local cases = {
+      {"/././foo", "/foo"},
+      {"/a/b/c/./../../g", "/a/g"},
+      {"/./.foo", "/.foo"},
+      {"/foo/.", "/foo"},
+      {"/foo/./", "/foo"},
+      {"/foo/bar/..", "/foo"},
+      {"/foo/bar/../", "/foo/"},
+      {"/foo/..bar", "/foo/..bar"},
+      {"/foo/bar/../ton", "/foo/ton"},
+      {"/foo/bar/../ton/../../a", "/a"},
+      {"/foo/../../..", "/"},
+      {"/foo/../../../ton", "/ton"},
+      {"////../..", "/"},
+    }
+
+    for _,v in ipairs(cases) do
+      print(v[1])
+      local buf = ffi.new("uint8_t[?]", #v[1])
+      local sizbuf = ffi.new("size_t[1]")
+      ffi.copy(buf, v[1], #v[1])
+      ffi.C.rspamd_http_normalize_path_inplace(buf, #v[1], sizbuf)
+      local res = ffi.string(buf, tonumber(sizbuf[0]))
+      assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in path ' .. v[1])
+    end
+  end)
 end)
index ddce88d3d673617caba9af6d4e7372ae6aac2dac..f3586e4f5bc916e2f46ddbb98cd8805d53afc852 100644 (file)
@@ -59,8 +59,6 @@ main (int argc, char **argv)
        g_test_add_func ("/rspamd/aio", rspamd_async_test_func);
 #endif
        g_test_run ();
-
-       g_mime_shutdown ();
        rspamd_regexp_library_finalize ();
 
        return 0;