]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Make http normalize path function a generic function
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 23 Oct 2022 20:41:18 +0000 (21:41 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 23 Oct 2022 20:41:18 +0000 (21:41 +0100)
src/controller.c
src/libserver/http/http_router.c
src/libserver/http/http_util.c
src/libserver/http/http_util.h
src/libserver/hyperscan_tools.cxx
src/libserver/url.c
src/libutil/util.c
src/libutil/util.h
test/lua/unit/url.lua

index e695d86a4477ad69160a26adabaeace46adba9a9..0ff7d64c07d117815763d3ac4eba7414008673d8 100644 (file)
@@ -3287,9 +3287,9 @@ rspamd_controller_handle_custom (struct rspamd_http_connection_entry *conn_ent,
                lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
                lookup.len = u.field_data[UF_PATH].len;
 
-               rspamd_http_normalize_path_inplace ((gchar *)lookup.begin,
-                               lookup.len,
-                               &unnorm_len);
+               rspamd_normalize_path_inplace((gchar *) lookup.begin,
+                       lookup.len,
+                       &unnorm_len);
                lookup.len = unnorm_len;
        }
        else {
@@ -3494,9 +3494,9 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e
                lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
                lookup.len = u.field_data[UF_PATH].len;
 
-               rspamd_http_normalize_path_inplace ((gchar *)lookup.begin,
-                               lookup.len,
-                               &unnorm_len);
+               rspamd_normalize_path_inplace((gchar *) lookup.begin,
+                       lookup.len,
+                       &unnorm_len);
                lookup.len = unnorm_len;
        }
        else {
index 5c4990ab627bc43274062094c5a2789e8434c7d6..a70ea223f3beb50e5b2cc7b5b279750bab39424f 100644 (file)
@@ -302,9 +302,9 @@ rspamd_http_router_finish_handler (struct rspamd_http_connection *conn,
                                lookup.begin = pathbuf;
                                lookup.len = u.field_data[UF_PATH].len;
 
-                               rspamd_http_normalize_path_inplace (pathbuf,
-                                               lookup.len,
-                                               &unnorm_len);
+                               rspamd_normalize_path_inplace(pathbuf,
+                                       lookup.len,
+                                       &unnorm_len);
                                lookup.len = unnorm_len;
                        }
                        else {
index fd5adb3c1d2fce3822352e0cfd6a7741d0b24c83..c9035375ba27142bc11c3107ccd99dee6c8de7ab 100644 (file)
@@ -299,228 +299,4 @@ rspamd_http_date_format (gchar *buf, gsize len, time_t time)
                        http_week[tms.tm_wday], tms.tm_mday,
                        http_month[tms.tm_mon], tms.tm_year + 1900,
                        tms.tm_hour, tms.tm_min, tms.tm_sec);
-}
-
-void
-rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
-{
-       const gchar *p, *end, *slash = NULL, *dot = NULL;
-       gchar *o;
-       enum {
-               st_normal = 0,
-               st_got_dot,
-               st_got_dot_dot,
-               st_got_slash,
-               st_got_slash_slash,
-       } state = st_normal;
-
-       p = path;
-       end = path + len;
-       o = path;
-
-       while (p < end) {
-               switch (state) {
-               case st_normal:
-                       if (G_UNLIKELY (*p == '/')) {
-                               state = st_got_slash;
-                               slash = p;
-                       }
-                       else if (G_UNLIKELY (*p == '.')) {
-                               state = st_got_dot;
-                               dot = p;
-                       }
-                       else {
-                               *o++ = *p;
-                       }
-                       p ++;
-                       break;
-               case st_got_slash:
-                       if (G_UNLIKELY (*p == '/')) {
-                               /* Ignore double slash */
-                               *o++ = *p;
-                               state = st_got_slash_slash;
-                       }
-                       else if (G_UNLIKELY (*p == '.')) {
-                               dot = p;
-                               state = st_got_dot;
-                       }
-                       else {
-                               *o++ = '/';
-                               *o++ = *p;
-                               slash = NULL;
-                               dot = NULL;
-                               state = st_normal;
-                       }
-                       p ++;
-                       break;
-               case st_got_slash_slash:
-                       if (G_LIKELY (*p != '/')) {
-                               slash = p - 1;
-                               dot = NULL;
-                               state = st_normal;
-                               continue;
-                       }
-                       p ++;
-                       break;
-               case st_got_dot:
-                       if (G_UNLIKELY (*p == '/')) {
-                               /* Remove any /./ or ./ paths */
-                               if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
-                                       /* Preserve one slash */
-                                       *o++ = '/';
-                               }
-
-                               slash = p;
-                               dot = NULL;
-                               /* Ignore last slash */
-                               state = st_normal;
-                       }
-                       else if (*p == '.') {
-                               /* Double dot character */
-                               state = st_got_dot_dot;
-                       }
-                       else {
-                               /* We have something like .some or /.some */
-                               if (dot && p > dot) {
-                                       if (slash == dot - 1 && (o > path && *(o - 1) != '/')) {
-                                               /* /.blah */
-                                               memmove (o, slash, p - slash);
-                                               o += p - slash;
-                                       }
-                                       else {
-                                               memmove (o, dot, p - dot);
-                                               o += p - dot;
-                                       }
-                               }
-
-                               slash = NULL;
-                               dot = NULL;
-                               state = st_normal;
-                               continue;
-                       }
-
-                       p ++;
-                       break;
-               case st_got_dot_dot:
-                       if (*p == '/') {
-                               /* We have something like /../ or ../ */
-                               if (slash) {
-                                       /* We need to remove the last component from o if it is there */
-                                       if (o > path + 2 && *(o - 1) == '/') {
-                                               slash = rspamd_memrchr (path, '/', o - path - 2);
-                                       }
-                                       else if (o > path + 1) {
-                                               slash = rspamd_memrchr (path, '/', o - path - 1);
-                                       }
-                                       else {
-                                               slash = NULL;
-                                       }
-
-                                       if (slash) {
-                                               o = (gchar *)slash;
-                                       }
-                                       /* Otherwise we keep these dots */
-                                       slash = p;
-                                       state = st_got_slash;
-                               }
-                               else {
-                                       /* We have something like bla../, so we need to copy it as is */
-                                       if (o > path && dot && p > dot) {
-                                               memmove (o, dot, p - dot);
-                                               o += p - dot;
-                                       }
-
-                                       slash = NULL;
-                                       dot = NULL;
-                                       state = st_normal;
-                                       continue;
-                               }
-                       }
-                       else {
-                               /* We have something like ..bla or ... */
-                               if (slash) {
-                                       *o ++ = '/';
-                               }
-
-                               if (dot && p > dot) {
-                                       memmove (o, dot, p - dot);
-                                       o += p - dot;
-                               }
-
-                               slash = NULL;
-                               dot = NULL;
-                               state = st_normal;
-                               continue;
-                       }
-
-                       p ++;
-                       break;
-               }
-       }
-
-       /* Leftover */
-       switch (state) {
-       case st_got_dot_dot:
-               /* Trailing .. */
-               if (slash) {
-                       /* We need to remove the last component from o if it is there */
-                       if (o > path + 2 && *(o - 1) == '/') {
-                               slash = rspamd_memrchr (path, '/', o - path - 2);
-                       }
-                       else if (o > path + 1) {
-                               slash = rspamd_memrchr (path, '/', o - path - 1);
-                       }
-                       else {
-                               if (o == path) {
-                                       /* Corner case */
-                                       *o++ = '/';
-                               }
-
-                               slash = NULL;
-                       }
-
-                       if (slash) {
-                               /* Remove last / */
-                               o = (gchar *)slash;
-                       }
-               }
-               else {
-                       /* Corner case */
-                       if (o == path) {
-                               *o++ = '/';
-                       }
-                       else {
-                               if (dot && p > dot) {
-                                       memmove (o, dot, p - dot);
-                                       o += p - dot;
-                               }
-                       }
-               }
-               break;
-       case st_got_dot:
-               if (slash) {
-                       /* /. -> must be / */
-                       *o++ = '/';
-               }
-               else {
-                       if (o > path) {
-                               *o++ = '.';
-                       }
-               }
-               break;
-       case st_got_slash:
-               *o++ = '/';
-               break;
-       default:
-#if 0
-               if (o > path + 1 && *(o - 1) == '/') {
-                       o --;
-               }
-#endif
-               break;
-       }
-
-       if (nlen) {
-               *nlen = (o - path);
-       }
 }
\ No newline at end of file
index 19b497f301746186c6bb8241d9a180d25b7b90de..3d8356c6de61b1bab79565a989fbe1cf1d14e60e 100644 (file)
@@ -40,15 +40,6 @@ time_t rspamd_http_parse_date (const gchar *header, gsize len);
  */
 glong rspamd_http_date_format (gchar *buf, gsize len, time_t time);
 
-/**
- * Normalize HTTP path removing dot sequences and repeating '/' symbols as
- * per rfc3986#section-5.2
- * @param path
- * @param len
- * @param nlen
- */
-void rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
-
 #ifdef  __cplusplus
 }
 #endif
index 6ec5f7c36b0147c4ed12b0f6d19fa54057147ec0..bb1c9ffbc24fc649fbd1e60ffec49e1f42d5e874 100644 (file)
@@ -140,7 +140,7 @@ public:
 
                auto mut_fname = std::string{fname};
                std::size_t sz;
-               rspamd_http_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
+               rspamd_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
                mut_fname.resize(sz);
                auto dir = hs_known_files_cache::get_dir(mut_fname);
                auto ext =  hs_known_files_cache::get_extension(mut_fname);
index 805e3d65de574f5d67c358d6f57d5196c5badb4f..7be9d020aa3582ede2f3b360705e1d416529d000 100644 (file)
@@ -2439,8 +2439,8 @@ rspamd_url_parse (struct rspamd_url *uri,
 
                rspamd_url_shift (uri, unquoted_len, UF_PATH);
                /* We now normalize path */
-               rspamd_http_normalize_path_inplace (rspamd_url_data_unsafe (uri),
-                               uri->datalen, &unquoted_len);
+               rspamd_normalize_path_inplace(rspamd_url_data_unsafe (uri),
+                       uri->datalen, &unquoted_len);
                rspamd_url_shift (uri, unquoted_len, UF_PATH);
        }
 
index 547669536fe81d6583b083810495671509d0aaa8..bc62bb9193bbd162f4bad151c99d877c96fad418 100644 (file)
@@ -2471,3 +2471,227 @@ rspamd_sum_floats (float *ar, gsize *nelts)
        *nelts = cnt;
        return sum;
 }
+
+void
+rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
+{
+       const gchar *p, *end, *slash = NULL, *dot = NULL;
+       gchar *o;
+       enum {
+               st_normal = 0,
+               st_got_dot,
+               st_got_dot_dot,
+               st_got_slash,
+               st_got_slash_slash,
+       } state = st_normal;
+
+       p = path;
+       end = path + len;
+       o = path;
+
+       while (p < end) {
+               switch (state) {
+               case st_normal:
+                       if (G_UNLIKELY (*p == '/')) {
+                               state = st_got_slash;
+                               slash = p;
+                       }
+                       else if (G_UNLIKELY (*p == '.')) {
+                               state = st_got_dot;
+                               dot = p;
+                       }
+                       else {
+                               *o++ = *p;
+                       }
+                       p ++;
+                       break;
+               case st_got_slash:
+                       if (G_UNLIKELY (*p == '/')) {
+                               /* Ignore double slash */
+                               *o++ = *p;
+                               state = st_got_slash_slash;
+                       }
+                       else if (G_UNLIKELY (*p == '.')) {
+                               dot = p;
+                               state = st_got_dot;
+                       }
+                       else {
+                               *o++ = '/';
+                               *o++ = *p;
+                               slash = NULL;
+                               dot = NULL;
+                               state = st_normal;
+                       }
+                       p ++;
+                       break;
+               case st_got_slash_slash:
+                       if (G_LIKELY (*p != '/')) {
+                               slash = p - 1;
+                               dot = NULL;
+                               state = st_normal;
+                               continue;
+                       }
+                       p ++;
+                       break;
+               case st_got_dot:
+                       if (G_UNLIKELY (*p == '/')) {
+                               /* Remove any /./ or ./ paths */
+                               if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
+                                       /* Preserve one slash */
+                                       *o++ = '/';
+                               }
+
+                               slash = p;
+                               dot = NULL;
+                               /* Ignore last slash */
+                               state = st_normal;
+                       }
+                       else if (*p == '.') {
+                               /* Double dot character */
+                               state = st_got_dot_dot;
+                       }
+                       else {
+                               /* We have something like .some or /.some */
+                               if (dot && p > dot) {
+                                       if (slash == dot - 1 && (o > path && *(o - 1) != '/')) {
+                                               /* /.blah */
+                                               memmove (o, slash, p - slash);
+                                               o += p - slash;
+                                       }
+                                       else {
+                                               memmove (o, dot, p - dot);
+                                               o += p - dot;
+                                       }
+                               }
+
+                               slash = NULL;
+                               dot = NULL;
+                               state = st_normal;
+                               continue;
+                       }
+
+                       p ++;
+                       break;
+               case st_got_dot_dot:
+                       if (*p == '/') {
+                               /* We have something like /../ or ../ */
+                               if (slash) {
+                                       /* We need to remove the last component from o if it is there */
+                                       if (o > path + 2 && *(o - 1) == '/') {
+                                               slash = rspamd_memrchr (path, '/', o - path - 2);
+                                       }
+                                       else if (o > path + 1) {
+                                               slash = rspamd_memrchr (path, '/', o - path - 1);
+                                       }
+                                       else {
+                                               slash = NULL;
+                                       }
+
+                                       if (slash) {
+                                               o = (gchar *)slash;
+                                       }
+                                       /* Otherwise we keep these dots */
+                                       slash = p;
+                                       state = st_got_slash;
+                               }
+                               else {
+                                       /* We have something like bla../, so we need to copy it as is */
+                                       if (o > path && dot && p > dot) {
+                                               memmove (o, dot, p - dot);
+                                               o += p - dot;
+                                       }
+
+                                       slash = NULL;
+                                       dot = NULL;
+                                       state = st_normal;
+                                       continue;
+                               }
+                       }
+                       else {
+                               /* We have something like ..bla or ... */
+                               if (slash) {
+                                       *o ++ = '/';
+                               }
+
+                               if (dot && p > dot) {
+                                       memmove (o, dot, p - dot);
+                                       o += p - dot;
+                               }
+
+                               slash = NULL;
+                               dot = NULL;
+                               state = st_normal;
+                               continue;
+                       }
+
+                       p ++;
+                       break;
+               }
+       }
+
+       /* Leftover */
+       switch (state) {
+       case st_got_dot_dot:
+               /* Trailing .. */
+               if (slash) {
+                       /* We need to remove the last component from o if it is there */
+                       if (o > path + 2 && *(o - 1) == '/') {
+                               slash = rspamd_memrchr (path, '/', o - path - 2);
+                       }
+                       else if (o > path + 1) {
+                               slash = rspamd_memrchr (path, '/', o - path - 1);
+                       }
+                       else {
+                               if (o == path) {
+                                       /* Corner case */
+                                       *o++ = '/';
+                               }
+
+                               slash = NULL;
+                       }
+
+                       if (slash) {
+                               /* Remove last / */
+                               o = (gchar *)slash;
+                       }
+               }
+               else {
+                       /* Corner case */
+                       if (o == path) {
+                               *o++ = '/';
+                       }
+                       else {
+                               if (dot && p > dot) {
+                                       memmove (o, dot, p - dot);
+                                       o += p - dot;
+                               }
+                       }
+               }
+               break;
+       case st_got_dot:
+               if (slash) {
+                       /* /. -> must be / */
+                       *o++ = '/';
+               }
+               else {
+                       if (o > path) {
+                               *o++ = '.';
+                       }
+               }
+               break;
+       case st_got_slash:
+               *o++ = '/';
+               break;
+       default:
+#if 0
+               if (o > path + 1 && *(o - 1) == '/') {
+                       o --;
+               }
+#endif
+               break;
+       }
+
+       if (nlen) {
+               *nlen = (o - path);
+       }
+}
index f9be15d284f8c8b881c2cd682786346e8e77c091..f747bce5bb16ea02663dcbf4e065c7ff1791aa6a 100644 (file)
@@ -526,6 +526,15 @@ extern const struct rspamd_controller_pbkdf pbkdf_list[];
  */
 float rspamd_sum_floats (float *ar, gsize *nelts);
 
+/**
+ * Normalize file path removing dot sequences and repeating '/' symbols as
+ * per rfc3986#section-5.2
+ * @param path
+ * @param len
+ * @param nlen
+ */
+void rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
+
 #ifdef  __cplusplus
 }
 #endif
index 2016cc6f4c73d12d4e5b2e62079a8ab0f544aad1..46eeef277ab4c30f294424108ec10d840b0ecc52 100644 (file)
@@ -10,7 +10,7 @@ context("URL check functions", function()
   local ffi = require("ffi")
 
   ffi.cdef[[
-  void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen);
+  void rspamd_normalize_path_inplace(char *path, size_t len, size_t *nlen);
   ]]
 
   test_helper.init_url_parser()