lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
lookup.len = u.field_data[UF_PATH].len;
- rspamd_http_normalize_path_inplace ((gchar *)lookup.begin,
- lookup.len,
- &unnorm_len);
+ rspamd_normalize_path_inplace((gchar *) lookup.begin,
+ lookup.len,
+ &unnorm_len);
lookup.len = unnorm_len;
}
else {
lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
lookup.len = u.field_data[UF_PATH].len;
- rspamd_http_normalize_path_inplace ((gchar *)lookup.begin,
- lookup.len,
- &unnorm_len);
+ rspamd_normalize_path_inplace((gchar *) lookup.begin,
+ lookup.len,
+ &unnorm_len);
lookup.len = unnorm_len;
}
else {
lookup.begin = pathbuf;
lookup.len = u.field_data[UF_PATH].len;
- rspamd_http_normalize_path_inplace (pathbuf,
- lookup.len,
- &unnorm_len);
+ rspamd_normalize_path_inplace(pathbuf,
+ lookup.len,
+ &unnorm_len);
lookup.len = unnorm_len;
}
else {
http_week[tms.tm_wday], tms.tm_mday,
http_month[tms.tm_mon], tms.tm_year + 1900,
tms.tm_hour, tms.tm_min, tms.tm_sec);
-}
-
-void
-rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
-{
- const gchar *p, *end, *slash = NULL, *dot = NULL;
- gchar *o;
- enum {
- st_normal = 0,
- st_got_dot,
- st_got_dot_dot,
- st_got_slash,
- st_got_slash_slash,
- } state = st_normal;
-
- p = path;
- end = path + len;
- o = path;
-
- while (p < end) {
- switch (state) {
- case st_normal:
- if (G_UNLIKELY (*p == '/')) {
- state = st_got_slash;
- slash = p;
- }
- else if (G_UNLIKELY (*p == '.')) {
- state = st_got_dot;
- dot = p;
- }
- else {
- *o++ = *p;
- }
- p ++;
- break;
- case st_got_slash:
- if (G_UNLIKELY (*p == '/')) {
- /* Ignore double slash */
- *o++ = *p;
- state = st_got_slash_slash;
- }
- else if (G_UNLIKELY (*p == '.')) {
- dot = p;
- state = st_got_dot;
- }
- else {
- *o++ = '/';
- *o++ = *p;
- slash = NULL;
- dot = NULL;
- state = st_normal;
- }
- p ++;
- break;
- case st_got_slash_slash:
- if (G_LIKELY (*p != '/')) {
- slash = p - 1;
- dot = NULL;
- state = st_normal;
- continue;
- }
- p ++;
- break;
- case st_got_dot:
- if (G_UNLIKELY (*p == '/')) {
- /* Remove any /./ or ./ paths */
- if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
- /* Preserve one slash */
- *o++ = '/';
- }
-
- slash = p;
- dot = NULL;
- /* Ignore last slash */
- state = st_normal;
- }
- else if (*p == '.') {
- /* Double dot character */
- state = st_got_dot_dot;
- }
- else {
- /* We have something like .some or /.some */
- if (dot && p > dot) {
- if (slash == dot - 1 && (o > path && *(o - 1) != '/')) {
- /* /.blah */
- memmove (o, slash, p - slash);
- o += p - slash;
- }
- else {
- memmove (o, dot, p - dot);
- o += p - dot;
- }
- }
-
- slash = NULL;
- dot = NULL;
- state = st_normal;
- continue;
- }
-
- p ++;
- break;
- case st_got_dot_dot:
- if (*p == '/') {
- /* We have something like /../ or ../ */
- if (slash) {
- /* We need to remove the last component from o if it is there */
- if (o > path + 2 && *(o - 1) == '/') {
- slash = rspamd_memrchr (path, '/', o - path - 2);
- }
- else if (o > path + 1) {
- slash = rspamd_memrchr (path, '/', o - path - 1);
- }
- else {
- slash = NULL;
- }
-
- if (slash) {
- o = (gchar *)slash;
- }
- /* Otherwise we keep these dots */
- slash = p;
- state = st_got_slash;
- }
- else {
- /* We have something like bla../, so we need to copy it as is */
- if (o > path && dot && p > dot) {
- memmove (o, dot, p - dot);
- o += p - dot;
- }
-
- slash = NULL;
- dot = NULL;
- state = st_normal;
- continue;
- }
- }
- else {
- /* We have something like ..bla or ... */
- if (slash) {
- *o ++ = '/';
- }
-
- if (dot && p > dot) {
- memmove (o, dot, p - dot);
- o += p - dot;
- }
-
- slash = NULL;
- dot = NULL;
- state = st_normal;
- continue;
- }
-
- p ++;
- break;
- }
- }
-
- /* Leftover */
- switch (state) {
- case st_got_dot_dot:
- /* Trailing .. */
- if (slash) {
- /* We need to remove the last component from o if it is there */
- if (o > path + 2 && *(o - 1) == '/') {
- slash = rspamd_memrchr (path, '/', o - path - 2);
- }
- else if (o > path + 1) {
- slash = rspamd_memrchr (path, '/', o - path - 1);
- }
- else {
- if (o == path) {
- /* Corner case */
- *o++ = '/';
- }
-
- slash = NULL;
- }
-
- if (slash) {
- /* Remove last / */
- o = (gchar *)slash;
- }
- }
- else {
- /* Corner case */
- if (o == path) {
- *o++ = '/';
- }
- else {
- if (dot && p > dot) {
- memmove (o, dot, p - dot);
- o += p - dot;
- }
- }
- }
- break;
- case st_got_dot:
- if (slash) {
- /* /. -> must be / */
- *o++ = '/';
- }
- else {
- if (o > path) {
- *o++ = '.';
- }
- }
- break;
- case st_got_slash:
- *o++ = '/';
- break;
- default:
-#if 0
- if (o > path + 1 && *(o - 1) == '/') {
- o --;
- }
-#endif
- break;
- }
-
- if (nlen) {
- *nlen = (o - path);
- }
}
\ No newline at end of file
*/
glong rspamd_http_date_format (gchar *buf, gsize len, time_t time);
-/**
- * Normalize HTTP path removing dot sequences and repeating '/' symbols as
- * per rfc3986#section-5.2
- * @param path
- * @param len
- * @param nlen
- */
-void rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
-
#ifdef __cplusplus
}
#endif
auto mut_fname = std::string{fname};
std::size_t sz;
- rspamd_http_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
+ rspamd_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
mut_fname.resize(sz);
auto dir = hs_known_files_cache::get_dir(mut_fname);
auto ext = hs_known_files_cache::get_extension(mut_fname);
rspamd_url_shift (uri, unquoted_len, UF_PATH);
/* We now normalize path */
- rspamd_http_normalize_path_inplace (rspamd_url_data_unsafe (uri),
- uri->datalen, &unquoted_len);
+ rspamd_normalize_path_inplace(rspamd_url_data_unsafe (uri),
+ uri->datalen, &unquoted_len);
rspamd_url_shift (uri, unquoted_len, UF_PATH);
}
*nelts = cnt;
return sum;
}
+
+void
+rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
+{
+ const gchar *p, *end, *slash = NULL, *dot = NULL;
+ gchar *o;
+ enum {
+ st_normal = 0,
+ st_got_dot,
+ st_got_dot_dot,
+ st_got_slash,
+ st_got_slash_slash,
+ } state = st_normal;
+
+ p = path;
+ end = path + len;
+ o = path;
+
+ while (p < end) {
+ switch (state) {
+ case st_normal:
+ if (G_UNLIKELY (*p == '/')) {
+ state = st_got_slash;
+ slash = p;
+ }
+ else if (G_UNLIKELY (*p == '.')) {
+ state = st_got_dot;
+ dot = p;
+ }
+ else {
+ *o++ = *p;
+ }
+ p ++;
+ break;
+ case st_got_slash:
+ if (G_UNLIKELY (*p == '/')) {
+ /* Ignore double slash */
+ *o++ = *p;
+ state = st_got_slash_slash;
+ }
+ else if (G_UNLIKELY (*p == '.')) {
+ dot = p;
+ state = st_got_dot;
+ }
+ else {
+ *o++ = '/';
+ *o++ = *p;
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ }
+ p ++;
+ break;
+ case st_got_slash_slash:
+ if (G_LIKELY (*p != '/')) {
+ slash = p - 1;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+ p ++;
+ break;
+ case st_got_dot:
+ if (G_UNLIKELY (*p == '/')) {
+ /* Remove any /./ or ./ paths */
+ if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
+ /* Preserve one slash */
+ *o++ = '/';
+ }
+
+ slash = p;
+ dot = NULL;
+ /* Ignore last slash */
+ state = st_normal;
+ }
+ else if (*p == '.') {
+ /* Double dot character */
+ state = st_got_dot_dot;
+ }
+ else {
+ /* We have something like .some or /.some */
+ if (dot && p > dot) {
+ if (slash == dot - 1 && (o > path && *(o - 1) != '/')) {
+ /* /.blah */
+ memmove (o, slash, p - slash);
+ o += p - slash;
+ }
+ else {
+ memmove (o, dot, p - dot);
+ o += p - dot;
+ }
+ }
+
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+
+ p ++;
+ break;
+ case st_got_dot_dot:
+ if (*p == '/') {
+ /* We have something like /../ or ../ */
+ if (slash) {
+ /* We need to remove the last component from o if it is there */
+ if (o > path + 2 && *(o - 1) == '/') {
+ slash = rspamd_memrchr (path, '/', o - path - 2);
+ }
+ else if (o > path + 1) {
+ slash = rspamd_memrchr (path, '/', o - path - 1);
+ }
+ else {
+ slash = NULL;
+ }
+
+ if (slash) {
+ o = (gchar *)slash;
+ }
+ /* Otherwise we keep these dots */
+ slash = p;
+ state = st_got_slash;
+ }
+ else {
+ /* We have something like bla../, so we need to copy it as is */
+ if (o > path && dot && p > dot) {
+ memmove (o, dot, p - dot);
+ o += p - dot;
+ }
+
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+ }
+ else {
+ /* We have something like ..bla or ... */
+ if (slash) {
+ *o ++ = '/';
+ }
+
+ if (dot && p > dot) {
+ memmove (o, dot, p - dot);
+ o += p - dot;
+ }
+
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+
+ p ++;
+ break;
+ }
+ }
+
+ /* Leftover */
+ switch (state) {
+ case st_got_dot_dot:
+ /* Trailing .. */
+ if (slash) {
+ /* We need to remove the last component from o if it is there */
+ if (o > path + 2 && *(o - 1) == '/') {
+ slash = rspamd_memrchr (path, '/', o - path - 2);
+ }
+ else if (o > path + 1) {
+ slash = rspamd_memrchr (path, '/', o - path - 1);
+ }
+ else {
+ if (o == path) {
+ /* Corner case */
+ *o++ = '/';
+ }
+
+ slash = NULL;
+ }
+
+ if (slash) {
+ /* Remove last / */
+ o = (gchar *)slash;
+ }
+ }
+ else {
+ /* Corner case */
+ if (o == path) {
+ *o++ = '/';
+ }
+ else {
+ if (dot && p > dot) {
+ memmove (o, dot, p - dot);
+ o += p - dot;
+ }
+ }
+ }
+ break;
+ case st_got_dot:
+ if (slash) {
+ /* /. -> must be / */
+ *o++ = '/';
+ }
+ else {
+ if (o > path) {
+ *o++ = '.';
+ }
+ }
+ break;
+ case st_got_slash:
+ *o++ = '/';
+ break;
+ default:
+#if 0
+ if (o > path + 1 && *(o - 1) == '/') {
+ o --;
+ }
+#endif
+ break;
+ }
+
+ if (nlen) {
+ *nlen = (o - path);
+ }
+}
*/
float rspamd_sum_floats (float *ar, gsize *nelts);
+/**
+ * Normalize file path removing dot sequences and repeating '/' symbols as
+ * per rfc3986#section-5.2
+ * @param path
+ * @param len
+ * @param nlen
+ */
+void rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
+
#ifdef __cplusplus
}
#endif
local ffi = require("ffi")
ffi.cdef[[
- void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen);
+ void rspamd_normalize_path_inplace(char *path, size_t len, size_t *nlen);
]]
test_helper.init_url_parser()