[Rework] Make http normalize path function a generic function

author Vsevolod Stakhov <vsevolod@rspamd.com>

Sun, 23 Oct 2022 20:41:18 +0000 (21:41 +0100)

committer Vsevolod Stakhov <vsevolod@rspamd.com>

Sun, 23 Oct 2022 20:41:18 +0000 (21:41 +0100)
author Vsevolod Stakhov <vsevolod@rspamd.com>
Sun, 23 Oct 2022 20:41:18 +0000 (21:41 +0100)
committer Vsevolod Stakhov <vsevolod@rspamd.com>
Sun, 23 Oct 2022 20:41:18 +0000 (21:41 +0100)
diff --git a/src/controller.c b/src/controller.c

index e695d86a4477ad69160a26adabaeace46adba9a9..0ff7d64c07d117815763d3ac4eba7414008673d8 100644 (file)
--- a/src/controller.c
+++ b/src/controller.c
@@ -3287,9 +3287,9 @@ rspamd_controller_handle_custom (struct rspamd_http_connection_entry *conn_ent,
                 lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
                 lookup.len = u.field_data[UF_PATH].len;
  
-               rspamd_http_normalize_path_inplace ((gchar *)lookup.begin,
-                               lookup.len,
-                               &unnorm_len);
+               rspamd_normalize_path_inplace((gchar *) lookup.begin,
+                       lookup.len,
+                       &unnorm_len);
                 lookup.len = unnorm_len;
         }
         else {
@@ -3494,9 +3494,9 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e
                 lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
                 lookup.len = u.field_data[UF_PATH].len;
  
-               rspamd_http_normalize_path_inplace ((gchar *)lookup.begin,
-                               lookup.len,
-                               &unnorm_len);
+               rspamd_normalize_path_inplace((gchar *) lookup.begin,
+                       lookup.len,
+                       &unnorm_len);
                 lookup.len = unnorm_len;
         }
         else {
diff --git a/src/libserver/http/http_router.c b/src/libserver/http/http_router.c

index 5c4990ab627bc43274062094c5a2789e8434c7d6..a70ea223f3beb50e5b2cc7b5b279750bab39424f 100644 (file)
--- a/src/libserver/http/http_router.c
+++ b/src/libserver/http/http_router.c
@@ -302,9 +302,9 @@ rspamd_http_router_finish_handler (struct rspamd_http_connection *conn,
                                 lookup.begin = pathbuf;
                                 lookup.len = u.field_data[UF_PATH].len;
  
-                               rspamd_http_normalize_path_inplace (pathbuf,
-                                               lookup.len,
-                                               &unnorm_len);
+                               rspamd_normalize_path_inplace(pathbuf,
+                                       lookup.len,
+                                       &unnorm_len);
                                 lookup.len = unnorm_len;
                         }
                         else {
diff --git a/src/libserver/http/http_util.c b/src/libserver/http/http_util.c

index fd5adb3c1d2fce3822352e0cfd6a7741d0b24c83..c9035375ba27142bc11c3107ccd99dee6c8de7ab 100644 (file)
--- a/src/libserver/http/http_util.c
+++ b/src/libserver/http/http_util.c
@@ -299,228 +299,4 @@ rspamd_http_date_format (gchar *buf, gsize len, time_t time)
                         http_week[tms.tm_wday], tms.tm_mday,
                         http_month[tms.tm_mon], tms.tm_year + 1900,
                         tms.tm_hour, tms.tm_min, tms.tm_sec);
-}
-
-void
-rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
-{
-       const gchar *p, *end, *slash = NULL, *dot = NULL;
-       gchar *o;
-       enum {
-               st_normal = 0,
-               st_got_dot,
-               st_got_dot_dot,
-               st_got_slash,
-               st_got_slash_slash,
-       } state = st_normal;
-
-       p = path;
-       end = path + len;
-       o = path;
-
-       while (p < end) {
-               switch (state) {
-               case st_normal:
-                       if (G_UNLIKELY (*p == '/')) {
-                               state = st_got_slash;
-                               slash = p;
-                       }
-                       else if (G_UNLIKELY (*p == '.')) {
-                               state = st_got_dot;
-                               dot = p;
-                       }
-                       else {
-                               *o++ = *p;
-                       }
-                       p ++;
-                       break;
-               case st_got_slash:
-                       if (G_UNLIKELY (*p == '/')) {
-                               /* Ignore double slash */
-                               *o++ = *p;
-                               state = st_got_slash_slash;
-                       }
-                       else if (G_UNLIKELY (*p == '.')) {
-                               dot = p;
-                               state = st_got_dot;
-                       }
-                       else {
-                               *o++ = '/';
-                               *o++ = *p;
-                               slash = NULL;
-                               dot = NULL;
-                               state = st_normal;
-                       }
-                       p ++;
-                       break;
-               case st_got_slash_slash:
-                       if (G_LIKELY (*p != '/')) {
-                               slash = p - 1;
-                               dot = NULL;
-                               state = st_normal;
-                               continue;
-                       }
-                       p ++;
-                       break;
-               case st_got_dot:
-                       if (G_UNLIKELY (*p == '/')) {
-                               /* Remove any /./ or ./ paths */
-                               if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
-                                       /* Preserve one slash */
-                                       *o++ = '/';
-                               }
-
-                               slash = p;
-                               dot = NULL;
-                               /* Ignore last slash */
-                               state = st_normal;
-                       }
-                       else if (*p == '.') {
-                               /* Double dot character */
-                               state = st_got_dot_dot;
-                       }
-                       else {
-                               /* We have something like .some or /.some */
-                               if (dot && p > dot) {
-                                       if (slash == dot - 1 && (o > path && *(o - 1) != '/')) {
-                                               /* /.blah */
-                                               memmove (o, slash, p - slash);
-                                               o += p - slash;
-                                       }
-                                       else {
-                                               memmove (o, dot, p - dot);
-                                               o += p - dot;
-                                       }
-                               }
-
-                               slash = NULL;
-                               dot = NULL;
-                               state = st_normal;
-                               continue;
-                       }
-
-                       p ++;
-                       break;
-               case st_got_dot_dot:
-                       if (*p == '/') {
-                               /* We have something like /../ or ../ */
-                               if (slash) {
-                                       /* We need to remove the last component from o if it is there */
-                                       if (o > path + 2 && *(o - 1) == '/') {
-                                               slash = rspamd_memrchr (path, '/', o - path - 2);
-                                       }
-                                       else if (o > path + 1) {
-                                               slash = rspamd_memrchr (path, '/', o - path - 1);
-                                       }
-                                       else {
-                                               slash = NULL;
-                                       }
-
-                                       if (slash) {
-                                               o = (gchar *)slash;
-                                       }
-                                       /* Otherwise we keep these dots */
-                                       slash = p;
-                                       state = st_got_slash;
-                               }
-                               else {
-                                       /* We have something like bla../, so we need to copy it as is */
-                                       if (o > path && dot && p > dot) {
-                                               memmove (o, dot, p - dot);
-                                               o += p - dot;
-                                       }
-
-                                       slash = NULL;
-                                       dot = NULL;
-                                       state = st_normal;
-                                       continue;
-                               }
-                       }
-                       else {
-                               /* We have something like ..bla or ... */
-                               if (slash) {
-                                       *o ++ = '/';
-                               }
-
-                               if (dot && p > dot) {
-                                       memmove (o, dot, p - dot);
-                                       o += p - dot;
-                               }
-
-                               slash = NULL;
-                               dot = NULL;
-                               state = st_normal;
-                               continue;
-                       }
-
-                       p ++;
-                       break;
-               }
-       }
-
-       /* Leftover */
-       switch (state) {
-       case st_got_dot_dot:
-               /* Trailing .. */
-               if (slash) {
-                       /* We need to remove the last component from o if it is there */
-                       if (o > path + 2 && *(o - 1) == '/') {
-                               slash = rspamd_memrchr (path, '/', o - path - 2);
-                       }
-                       else if (o > path + 1) {
-                               slash = rspamd_memrchr (path, '/', o - path - 1);
-                       }
-                       else {
-                               if (o == path) {
-                                       /* Corner case */
-                                       *o++ = '/';
-                               }
-
-                               slash = NULL;
-                       }
-
-                       if (slash) {
-                               /* Remove last / */
-                               o = (gchar *)slash;
-                       }
-               }
-               else {
-                       /* Corner case */
-                       if (o == path) {
-                               *o++ = '/';
-                       }
-                       else {
-                               if (dot && p > dot) {
-                                       memmove (o, dot, p - dot);
-                                       o += p - dot;
-                               }
-                       }
-               }
-               break;
-       case st_got_dot:
-               if (slash) {
-                       /* /. -> must be / */
-                       *o++ = '/';
-               }
-               else {
-                       if (o > path) {
-                               *o++ = '.';
-                       }
-               }
-               break;
-       case st_got_slash:
-               *o++ = '/';
-               break;
-       default:
-#if 0
-               if (o > path + 1 && *(o - 1) == '/') {
-                       o --;
-               }
-#endif
-               break;
-       }
-
-       if (nlen) {
-               *nlen = (o - path);
-       }
  }
 \ No newline at end of file
diff --git a/src/libserver/http/http_util.h b/src/libserver/http/http_util.h

index 19b497f301746186c6bb8241d9a180d25b7b90de..3d8356c6de61b1bab79565a989fbe1cf1d14e60e 100644 (file)
--- a/src/libserver/http/http_util.h
+++ b/src/libserver/http/http_util.h
@@ -40,15 +40,6 @@ time_t rspamd_http_parse_date (const gchar *header, gsize len);
   */
  glong rspamd_http_date_format (gchar *buf, gsize len, time_t time);
  
-/**
- * Normalize HTTP path removing dot sequences and repeating '/' symbols as
- * per rfc3986#section-5.2
- * @param path
- * @param len
- * @param nlen
- */
-void rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
-
  #ifdef  __cplusplus
  }
  #endif
diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx

index 6ec5f7c36b0147c4ed12b0f6d19fa54057147ec0..bb1c9ffbc24fc649fbd1e60ffec49e1f42d5e874 100644 (file)
--- a/src/libserver/hyperscan_tools.cxx
+++ b/src/libserver/hyperscan_tools.cxx
@@ -140,7 +140,7 @@ public:
  
                 auto mut_fname = std::string{fname};
                 std::size_t sz;
-               rspamd_http_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
+               rspamd_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
                 mut_fname.resize(sz);
                 auto dir = hs_known_files_cache::get_dir(mut_fname);
                 auto ext =  hs_known_files_cache::get_extension(mut_fname);
diff --git a/src/libserver/url.c b/src/libserver/url.c

index 805e3d65de574f5d67c358d6f57d5196c5badb4f..7be9d020aa3582ede2f3b360705e1d416529d000 100644 (file)
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -2439,8 +2439,8 @@ rspamd_url_parse (struct rspamd_url *uri,
  
                 rspamd_url_shift (uri, unquoted_len, UF_PATH);
                 /* We now normalize path */
-               rspamd_http_normalize_path_inplace (rspamd_url_data_unsafe (uri),
-                               uri->datalen, &unquoted_len);
+               rspamd_normalize_path_inplace(rspamd_url_data_unsafe (uri),
+                       uri->datalen, &unquoted_len);
                 rspamd_url_shift (uri, unquoted_len, UF_PATH);
         }
  
diff --git a/src/libutil/util.c b/src/libutil/util.c

index 547669536fe81d6583b083810495671509d0aaa8..bc62bb9193bbd162f4bad151c99d877c96fad418 100644 (file)
--- a/src/libutil/util.c
+++ b/src/libutil/util.c
@@ -2471,3 +2471,227 @@ rspamd_sum_floats (float *ar, gsize *nelts)
         *nelts = cnt;
         return sum;
  }
+
+void
+rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
+{
+       const gchar *p, *end, *slash = NULL, *dot = NULL;
+       gchar *o;
+       enum {
+               st_normal = 0,
+               st_got_dot,
+               st_got_dot_dot,
+               st_got_slash,
+               st_got_slash_slash,
+       } state = st_normal;
+
+       p = path;
+       end = path + len;
+       o = path;
+
+       while (p < end) {
+               switch (state) {
+               case st_normal:
+                       if (G_UNLIKELY (*p == '/')) {
+                               state = st_got_slash;
+                               slash = p;
+                       }
+                       else if (G_UNLIKELY (*p == '.')) {
+                               state = st_got_dot;
+                               dot = p;
+                       }
+                       else {
+                               *o++ = *p;
+                       }
+                       p ++;
+                       break;
+               case st_got_slash:
+                       if (G_UNLIKELY (*p == '/')) {
+                               /* Ignore double slash */
+                               *o++ = *p;
+                               state = st_got_slash_slash;
+                       }
+                       else if (G_UNLIKELY (*p == '.')) {
+                               dot = p;
+                               state = st_got_dot;
+                       }
+                       else {
+                               *o++ = '/';
+                               *o++ = *p;
+                               slash = NULL;
+                               dot = NULL;
+                               state = st_normal;
+                       }
+                       p ++;
+                       break;
+               case st_got_slash_slash:
+                       if (G_LIKELY (*p != '/')) {
+                               slash = p - 1;
+                               dot = NULL;
+                               state = st_normal;
+                               continue;
+                       }
+                       p ++;
+                       break;
+               case st_got_dot:
+                       if (G_UNLIKELY (*p == '/')) {
+                               /* Remove any /./ or ./ paths */
+                               if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
+                                       /* Preserve one slash */
+                                       *o++ = '/';
+                               }
+
+                               slash = p;
+                               dot = NULL;
+                               /* Ignore last slash */
+                               state = st_normal;
+                       }
+                       else if (*p == '.') {
+                               /* Double dot character */
+                               state = st_got_dot_dot;
+                       }
+                       else {
+                               /* We have something like .some or /.some */
+                               if (dot && p > dot) {
+                                       if (slash == dot - 1 && (o > path && *(o - 1) != '/')) {
+                                               /* /.blah */
+                                               memmove (o, slash, p - slash);
+                                               o += p - slash;
+                                       }
+                                       else {
+                                               memmove (o, dot, p - dot);
+                                               o += p - dot;
+                                       }
+                               }
+
+                               slash = NULL;
+                               dot = NULL;
+                               state = st_normal;
+                               continue;
+                       }
+
+                       p ++;
+                       break;
+               case st_got_dot_dot:
+                       if (*p == '/') {
+                               /* We have something like /../ or ../ */
+                               if (slash) {
+                                       /* We need to remove the last component from o if it is there */
+                                       if (o > path + 2 && *(o - 1) == '/') {
+                                               slash = rspamd_memrchr (path, '/', o - path - 2);
+                                       }
+                                       else if (o > path + 1) {
+                                               slash = rspamd_memrchr (path, '/', o - path - 1);
+                                       }
+                                       else {
+                                               slash = NULL;
+                                       }
+
+                                       if (slash) {
+                                               o = (gchar *)slash;
+                                       }
+                                       /* Otherwise we keep these dots */
+                                       slash = p;
+                                       state = st_got_slash;
+                               }
+                               else {
+                                       /* We have something like bla../, so we need to copy it as is */
+                                       if (o > path && dot && p > dot) {
+                                               memmove (o, dot, p - dot);
+                                               o += p - dot;
+                                       }
+
+                                       slash = NULL;
+                                       dot = NULL;
+                                       state = st_normal;
+                                       continue;
+                               }
+                       }
+                       else {
+                               /* We have something like ..bla or ... */
+                               if (slash) {
+                                       *o ++ = '/';
+                               }
+
+                               if (dot && p > dot) {
+                                       memmove (o, dot, p - dot);
+                                       o += p - dot;
+                               }
+
+                               slash = NULL;
+                               dot = NULL;
+                               state = st_normal;
+                               continue;
+                       }
+
+                       p ++;
+                       break;
+               }
+       }
+
+       /* Leftover */
+       switch (state) {
+       case st_got_dot_dot:
+               /* Trailing .. */
+               if (slash) {
+                       /* We need to remove the last component from o if it is there */
+                       if (o > path + 2 && *(o - 1) == '/') {
+                               slash = rspamd_memrchr (path, '/', o - path - 2);
+                       }
+                       else if (o > path + 1) {
+                               slash = rspamd_memrchr (path, '/', o - path - 1);
+                       }
+                       else {
+                               if (o == path) {
+                                       /* Corner case */
+                                       *o++ = '/';
+                               }
+
+                               slash = NULL;
+                       }
+
+                       if (slash) {
+                               /* Remove last / */
+                               o = (gchar *)slash;
+                       }
+               }
+               else {
+                       /* Corner case */
+                       if (o == path) {
+                               *o++ = '/';
+                       }
+                       else {
+                               if (dot && p > dot) {
+                                       memmove (o, dot, p - dot);
+                                       o += p - dot;
+                               }
+                       }
+               }
+               break;
+       case st_got_dot:
+               if (slash) {
+                       /* /. -> must be / */
+                       *o++ = '/';
+               }
+               else {
+                       if (o > path) {
+                               *o++ = '.';
+                       }
+               }
+               break;
+       case st_got_slash:
+               *o++ = '/';
+               break;
+       default:
+#if 0
+               if (o > path + 1 && *(o - 1) == '/') {
+                       o --;
+               }
+#endif
+               break;
+       }
+
+       if (nlen) {
+               *nlen = (o - path);
+       }
+}
diff --git a/src/libutil/util.h b/src/libutil/util.h

index f9be15d284f8c8b881c2cd682786346e8e77c091..f747bce5bb16ea02663dcbf4e065c7ff1791aa6a 100644 (file)
--- a/src/libutil/util.h
+++ b/src/libutil/util.h
@@ -526,6 +526,15 @@ extern const struct rspamd_controller_pbkdf pbkdf_list[];
   */
  float rspamd_sum_floats (float *ar, gsize *nelts);
  
+/**
+ * Normalize file path removing dot sequences and repeating '/' symbols as
+ * per rfc3986#section-5.2
+ * @param path
+ * @param len
+ * @param nlen
+ */
+void rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
+
  #ifdef  __cplusplus
  }
  #endif
diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua

index 2016cc6f4c73d12d4e5b2e62079a8ab0f544aad1..46eeef277ab4c30f294424108ec10d840b0ecc52 100644 (file)
--- a/test/lua/unit/url.lua
+++ b/test/lua/unit/url.lua
@@ -10,7 +10,7 @@ context("URL check functions", function()
    local ffi = require("ffi")
  
    ffi.cdef[[
-  void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen);
+  void rspamd_normalize_path_inplace(char *path, size_t len, size_t *nlen);
    ]]
  
    test_helper.init_url_parser()
author	Vsevolod Stakhov <vsevolod@rspamd.com>
	Sun, 23 Oct 2022 20:41:18 +0000 (21:41 +0100)
committer	Vsevolod Stakhov <vsevolod@rspamd.com>
	Sun, 23 Oct 2022 20:41:18 +0000 (21:41 +0100)
src/controller.c		patch \| blob \| history
src/libserver/http/http_router.c		patch \| blob \| history
src/libserver/http/http_util.c		patch \| blob \| history
src/libserver/http/http_util.h		patch \| blob \| history
src/libserver/hyperscan_tools.cxx		patch \| blob \| history
src/libserver/url.c		patch \| blob \| history
src/libutil/util.c		patch \| blob \| history
src/libutil/util.h		patch \| blob \| history
test/lua/unit/url.lua		patch \| blob \| history