From 96b94707c1c6fde1cc2aa06522587114c5c6c809 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 23 Oct 2022 21:41:18 +0100 Subject: [PATCH] [Rework] Make http normalize path function a generic function --- src/controller.c | 12 +- src/libserver/http/http_router.c | 6 +- src/libserver/http/http_util.c | 224 ------------------------------ src/libserver/http/http_util.h | 9 -- src/libserver/hyperscan_tools.cxx | 2 +- src/libserver/url.c | 4 +- src/libutil/util.c | 224 ++++++++++++++++++++++++++++++ src/libutil/util.h | 9 ++ test/lua/unit/url.lua | 2 +- 9 files changed, 246 insertions(+), 246 deletions(-) diff --git a/src/controller.c b/src/controller.c index e695d86a4..0ff7d64c0 100644 --- a/src/controller.c +++ b/src/controller.c @@ -3287,9 +3287,9 @@ rspamd_controller_handle_custom (struct rspamd_http_connection_entry *conn_ent, lookup.begin = msg->url->str + u.field_data[UF_PATH].off; lookup.len = u.field_data[UF_PATH].len; - rspamd_http_normalize_path_inplace ((gchar *)lookup.begin, - lookup.len, - &unnorm_len); + rspamd_normalize_path_inplace((gchar *) lookup.begin, + lookup.len, + &unnorm_len); lookup.len = unnorm_len; } else { @@ -3494,9 +3494,9 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e lookup.begin = msg->url->str + u.field_data[UF_PATH].off; lookup.len = u.field_data[UF_PATH].len; - rspamd_http_normalize_path_inplace ((gchar *)lookup.begin, - lookup.len, - &unnorm_len); + rspamd_normalize_path_inplace((gchar *) lookup.begin, + lookup.len, + &unnorm_len); lookup.len = unnorm_len; } else { diff --git a/src/libserver/http/http_router.c b/src/libserver/http/http_router.c index 5c4990ab6..a70ea223f 100644 --- a/src/libserver/http/http_router.c +++ b/src/libserver/http/http_router.c @@ -302,9 +302,9 @@ rspamd_http_router_finish_handler (struct rspamd_http_connection *conn, lookup.begin = pathbuf; lookup.len = u.field_data[UF_PATH].len; - rspamd_http_normalize_path_inplace (pathbuf, - lookup.len, - &unnorm_len); + rspamd_normalize_path_inplace(pathbuf, + lookup.len, + &unnorm_len); lookup.len = unnorm_len; } else { diff --git a/src/libserver/http/http_util.c b/src/libserver/http/http_util.c index fd5adb3c1..c9035375b 100644 --- a/src/libserver/http/http_util.c +++ b/src/libserver/http/http_util.c @@ -299,228 +299,4 @@ rspamd_http_date_format (gchar *buf, gsize len, time_t time) http_week[tms.tm_wday], tms.tm_mday, http_month[tms.tm_mon], tms.tm_year + 1900, tms.tm_hour, tms.tm_min, tms.tm_sec); -} - -void -rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen) -{ - const gchar *p, *end, *slash = NULL, *dot = NULL; - gchar *o; - enum { - st_normal = 0, - st_got_dot, - st_got_dot_dot, - st_got_slash, - st_got_slash_slash, - } state = st_normal; - - p = path; - end = path + len; - o = path; - - while (p < end) { - switch (state) { - case st_normal: - if (G_UNLIKELY (*p == '/')) { - state = st_got_slash; - slash = p; - } - else if (G_UNLIKELY (*p == '.')) { - state = st_got_dot; - dot = p; - } - else { - *o++ = *p; - } - p ++; - break; - case st_got_slash: - if (G_UNLIKELY (*p == '/')) { - /* Ignore double slash */ - *o++ = *p; - state = st_got_slash_slash; - } - else if (G_UNLIKELY (*p == '.')) { - dot = p; - state = st_got_dot; - } - else { - *o++ = '/'; - *o++ = *p; - slash = NULL; - dot = NULL; - state = st_normal; - } - p ++; - break; - case st_got_slash_slash: - if (G_LIKELY (*p != '/')) { - slash = p - 1; - dot = NULL; - state = st_normal; - continue; - } - p ++; - break; - case st_got_dot: - if (G_UNLIKELY (*p == '/')) { - /* Remove any /./ or ./ paths */ - if (((o > path && *(o - 1) != '/') || (o == path)) && slash) { - /* Preserve one slash */ - *o++ = '/'; - } - - slash = p; - dot = NULL; - /* Ignore last slash */ - state = st_normal; - } - else if (*p == '.') { - /* Double dot character */ - state = st_got_dot_dot; - } - else { - /* We have something like .some or /.some */ - if (dot && p > dot) { - if (slash == dot - 1 && (o > path && *(o - 1) != '/')) { - /* /.blah */ - memmove (o, slash, p - slash); - o += p - slash; - } - else { - memmove (o, dot, p - dot); - o += p - dot; - } - } - - slash = NULL; - dot = NULL; - state = st_normal; - continue; - } - - p ++; - break; - case st_got_dot_dot: - if (*p == '/') { - /* We have something like /../ or ../ */ - if (slash) { - /* We need to remove the last component from o if it is there */ - if (o > path + 2 && *(o - 1) == '/') { - slash = rspamd_memrchr (path, '/', o - path - 2); - } - else if (o > path + 1) { - slash = rspamd_memrchr (path, '/', o - path - 1); - } - else { - slash = NULL; - } - - if (slash) { - o = (gchar *)slash; - } - /* Otherwise we keep these dots */ - slash = p; - state = st_got_slash; - } - else { - /* We have something like bla../, so we need to copy it as is */ - if (o > path && dot && p > dot) { - memmove (o, dot, p - dot); - o += p - dot; - } - - slash = NULL; - dot = NULL; - state = st_normal; - continue; - } - } - else { - /* We have something like ..bla or ... */ - if (slash) { - *o ++ = '/'; - } - - if (dot && p > dot) { - memmove (o, dot, p - dot); - o += p - dot; - } - - slash = NULL; - dot = NULL; - state = st_normal; - continue; - } - - p ++; - break; - } - } - - /* Leftover */ - switch (state) { - case st_got_dot_dot: - /* Trailing .. */ - if (slash) { - /* We need to remove the last component from o if it is there */ - if (o > path + 2 && *(o - 1) == '/') { - slash = rspamd_memrchr (path, '/', o - path - 2); - } - else if (o > path + 1) { - slash = rspamd_memrchr (path, '/', o - path - 1); - } - else { - if (o == path) { - /* Corner case */ - *o++ = '/'; - } - - slash = NULL; - } - - if (slash) { - /* Remove last / */ - o = (gchar *)slash; - } - } - else { - /* Corner case */ - if (o == path) { - *o++ = '/'; - } - else { - if (dot && p > dot) { - memmove (o, dot, p - dot); - o += p - dot; - } - } - } - break; - case st_got_dot: - if (slash) { - /* /. -> must be / */ - *o++ = '/'; - } - else { - if (o > path) { - *o++ = '.'; - } - } - break; - case st_got_slash: - *o++ = '/'; - break; - default: -#if 0 - if (o > path + 1 && *(o - 1) == '/') { - o --; - } -#endif - break; - } - - if (nlen) { - *nlen = (o - path); - } } \ No newline at end of file diff --git a/src/libserver/http/http_util.h b/src/libserver/http/http_util.h index 19b497f30..3d8356c6d 100644 --- a/src/libserver/http/http_util.h +++ b/src/libserver/http/http_util.h @@ -40,15 +40,6 @@ time_t rspamd_http_parse_date (const gchar *header, gsize len); */ glong rspamd_http_date_format (gchar *buf, gsize len, time_t time); -/** - * Normalize HTTP path removing dot sequences and repeating '/' symbols as - * per rfc3986#section-5.2 - * @param path - * @param len - * @param nlen - */ -void rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen); - #ifdef __cplusplus } #endif diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx index 6ec5f7c36..bb1c9ffbc 100644 --- a/src/libserver/hyperscan_tools.cxx +++ b/src/libserver/hyperscan_tools.cxx @@ -140,7 +140,7 @@ public: auto mut_fname = std::string{fname}; std::size_t sz; - rspamd_http_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz); + rspamd_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz); mut_fname.resize(sz); auto dir = hs_known_files_cache::get_dir(mut_fname); auto ext = hs_known_files_cache::get_extension(mut_fname); diff --git a/src/libserver/url.c b/src/libserver/url.c index 805e3d65d..7be9d020a 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -2439,8 +2439,8 @@ rspamd_url_parse (struct rspamd_url *uri, rspamd_url_shift (uri, unquoted_len, UF_PATH); /* We now normalize path */ - rspamd_http_normalize_path_inplace (rspamd_url_data_unsafe (uri), - uri->datalen, &unquoted_len); + rspamd_normalize_path_inplace(rspamd_url_data_unsafe (uri), + uri->datalen, &unquoted_len); rspamd_url_shift (uri, unquoted_len, UF_PATH); } diff --git a/src/libutil/util.c b/src/libutil/util.c index 547669536..bc62bb919 100644 --- a/src/libutil/util.c +++ b/src/libutil/util.c @@ -2471,3 +2471,227 @@ rspamd_sum_floats (float *ar, gsize *nelts) *nelts = cnt; return sum; } + +void +rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen) +{ + const gchar *p, *end, *slash = NULL, *dot = NULL; + gchar *o; + enum { + st_normal = 0, + st_got_dot, + st_got_dot_dot, + st_got_slash, + st_got_slash_slash, + } state = st_normal; + + p = path; + end = path + len; + o = path; + + while (p < end) { + switch (state) { + case st_normal: + if (G_UNLIKELY (*p == '/')) { + state = st_got_slash; + slash = p; + } + else if (G_UNLIKELY (*p == '.')) { + state = st_got_dot; + dot = p; + } + else { + *o++ = *p; + } + p ++; + break; + case st_got_slash: + if (G_UNLIKELY (*p == '/')) { + /* Ignore double slash */ + *o++ = *p; + state = st_got_slash_slash; + } + else if (G_UNLIKELY (*p == '.')) { + dot = p; + state = st_got_dot; + } + else { + *o++ = '/'; + *o++ = *p; + slash = NULL; + dot = NULL; + state = st_normal; + } + p ++; + break; + case st_got_slash_slash: + if (G_LIKELY (*p != '/')) { + slash = p - 1; + dot = NULL; + state = st_normal; + continue; + } + p ++; + break; + case st_got_dot: + if (G_UNLIKELY (*p == '/')) { + /* Remove any /./ or ./ paths */ + if (((o > path && *(o - 1) != '/') || (o == path)) && slash) { + /* Preserve one slash */ + *o++ = '/'; + } + + slash = p; + dot = NULL; + /* Ignore last slash */ + state = st_normal; + } + else if (*p == '.') { + /* Double dot character */ + state = st_got_dot_dot; + } + else { + /* We have something like .some or /.some */ + if (dot && p > dot) { + if (slash == dot - 1 && (o > path && *(o - 1) != '/')) { + /* /.blah */ + memmove (o, slash, p - slash); + o += p - slash; + } + else { + memmove (o, dot, p - dot); + o += p - dot; + } + } + + slash = NULL; + dot = NULL; + state = st_normal; + continue; + } + + p ++; + break; + case st_got_dot_dot: + if (*p == '/') { + /* We have something like /../ or ../ */ + if (slash) { + /* We need to remove the last component from o if it is there */ + if (o > path + 2 && *(o - 1) == '/') { + slash = rspamd_memrchr (path, '/', o - path - 2); + } + else if (o > path + 1) { + slash = rspamd_memrchr (path, '/', o - path - 1); + } + else { + slash = NULL; + } + + if (slash) { + o = (gchar *)slash; + } + /* Otherwise we keep these dots */ + slash = p; + state = st_got_slash; + } + else { + /* We have something like bla../, so we need to copy it as is */ + if (o > path && dot && p > dot) { + memmove (o, dot, p - dot); + o += p - dot; + } + + slash = NULL; + dot = NULL; + state = st_normal; + continue; + } + } + else { + /* We have something like ..bla or ... */ + if (slash) { + *o ++ = '/'; + } + + if (dot && p > dot) { + memmove (o, dot, p - dot); + o += p - dot; + } + + slash = NULL; + dot = NULL; + state = st_normal; + continue; + } + + p ++; + break; + } + } + + /* Leftover */ + switch (state) { + case st_got_dot_dot: + /* Trailing .. */ + if (slash) { + /* We need to remove the last component from o if it is there */ + if (o > path + 2 && *(o - 1) == '/') { + slash = rspamd_memrchr (path, '/', o - path - 2); + } + else if (o > path + 1) { + slash = rspamd_memrchr (path, '/', o - path - 1); + } + else { + if (o == path) { + /* Corner case */ + *o++ = '/'; + } + + slash = NULL; + } + + if (slash) { + /* Remove last / */ + o = (gchar *)slash; + } + } + else { + /* Corner case */ + if (o == path) { + *o++ = '/'; + } + else { + if (dot && p > dot) { + memmove (o, dot, p - dot); + o += p - dot; + } + } + } + break; + case st_got_dot: + if (slash) { + /* /. -> must be / */ + *o++ = '/'; + } + else { + if (o > path) { + *o++ = '.'; + } + } + break; + case st_got_slash: + *o++ = '/'; + break; + default: +#if 0 + if (o > path + 1 && *(o - 1) == '/') { + o --; + } +#endif + break; + } + + if (nlen) { + *nlen = (o - path); + } +} diff --git a/src/libutil/util.h b/src/libutil/util.h index f9be15d28..f747bce5b 100644 --- a/src/libutil/util.h +++ b/src/libutil/util.h @@ -526,6 +526,15 @@ extern const struct rspamd_controller_pbkdf pbkdf_list[]; */ float rspamd_sum_floats (float *ar, gsize *nelts); +/** + * Normalize file path removing dot sequences and repeating '/' symbols as + * per rfc3986#section-5.2 + * @param path + * @param len + * @param nlen + */ +void rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen); + #ifdef __cplusplus } #endif diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua index 2016cc6f4..46eeef277 100644 --- a/test/lua/unit/url.lua +++ b/test/lua/unit/url.lua @@ -10,7 +10,7 @@ context("URL check functions", function() local ffi = require("ffi") ffi.cdef[[ - void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen); + void rspamd_normalize_path_inplace(char *path, size_t len, size_t *nlen); ]] test_helper.init_url_parser() -- 2.39.5