From e0befa616f7f0fdfc823b8a442f398e8c649cd95 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 25 Nov 2023 13:42:03 +0000 Subject: [PATCH] [Test] Add unit tests for unfolding --- src/libmime/mime_headers.c | 74 +++++++++++++++++++++++++++++++++- src/libmime/mime_headers.h | 8 ++++ test/rspamd_cxx_unit_utils.hxx | 51 +++++++++++++++++++++-- 3 files changed, 129 insertions(+), 4 deletions(-) diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index 2b6b2acc0..e250e84f1 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -1025,6 +1025,76 @@ rspamd_message_headers_new(void) return nhdrs; } +gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len) +{ + /* + * t - tortoise (destination) + * h - hare (source) + */ + char *t = hdr, *h = hdr, *end = (hdr + len); + enum { + copy_chars, + folding_cr, + folding_lf, + folding_ws, + } state = copy_chars; + + while (h < end) { + switch (state) { + case copy_chars: + if (*h == '\r') { + state = folding_cr; + h++; + } + else if (*h == '\n') { + state = folding_lf; + h++; + } + else { + *t++ = *h++; + } + break; + case folding_cr: + if (*h == '\n') { + state = folding_lf; + h++; + } + else if (g_ascii_isspace(*h)) { + state = folding_ws; + h++; + } + else { + /* It is weird, not like a folding, so we need to revert back */ + *t++ = '\r'; + state = copy_chars; + } + break; + case folding_lf: + if (g_ascii_isspace(*h)) { + state = folding_ws; + h++; + } + else { + /* It is weird, not like a folding, so we need to revert back */ + *t++ = '\n'; + state = copy_chars; + } + break; + case folding_ws: + if (!g_ascii_isspace(*h)) { + *t++ = ' '; + state = copy_chars; + } + else { + h++; + } + break; + } + } + + return t - hdr; +} + void rspamd_message_set_modified_header(struct rspamd_task *task, struct rspamd_mime_headers_table *hdrs, const gchar *hdr_name, @@ -1201,8 +1271,10 @@ void rspamd_message_set_modified_header(struct rspamd_task *task, nhdr->name = hdr_elt->name; nhdr->value = rspamd_mempool_alloc(task->task_pool, raw_len + 1); + /* Strlcpy will ensure that value will have no embedded \0 */ rspamd_strlcpy(nhdr->value, raw_value, raw_len + 1); - /* TODO: unfold header value, sigh */ + gsize value_len = rspamd_message_header_unfold_inplace(nhdr->value, raw_len); + nhdr->value[value_len] = '\0'; /* Deal with the raw value */ size_t namelen = strlen(hdr_elt->name); diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index ffa863e8b..60015a20e 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -185,6 +185,14 @@ bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *, */ gsize rspamd_strip_smtp_comments_inplace(gchar *input, gsize len); +/** + * Unfold header in place + * @param hdr header value + * @param len length of the header + * @return new unfolded length + */ +gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len); + #ifdef __cplusplus } #endif diff --git a/test/rspamd_cxx_unit_utils.hxx b/test/rspamd_cxx_unit_utils.hxx index 3e53a6d33..126253fd6 100644 --- a/test/rspamd_cxx_unit_utils.hxx +++ b/test/rspamd_cxx_unit_utils.hxx @@ -1,11 +1,11 @@ -/*- - * Copyright 2021 Vsevolod Stakhov +/* + * Copyright 2023 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -159,6 +159,51 @@ TEST_SUITE("rspamd_utils") rspamd_fstring_free(fstr); } } + + TEST_CASE("rspamd_message_header_unfold_inplace") + { + std::vector> cases{ + {"abc", "abc"}, + {"abc\r\n def", "abc def"}, + {"abc\r\n\tdef", "abc def"}, + {"abc\r\n\tdef\r\n\tghi", "abc def ghi"}, + {"abc\r\n\tdef\r\n\tghi\r\n", "abc def ghi"}, + {"abc\r\n\tdef\r\n\tghi\r\n\t", "abc def ghi"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl", "abc def ghi jkl"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n", "abc def ghi jkl"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\t", "abc def ghi jkl"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno", "abc def ghi jkl mno"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n", "abc def ghi jkl mno"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\t", "abc def ghi jkl mno"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr", "abc def ghi jkl mno pqr"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n", "abc def ghi jkl mno pqr"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\t", "abc def ghi jkl mno pqr"}, + {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu", "abc def ghi jkl mno pqr stu"}, + // Newline at the end + { + "abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu\r\n", "abc def ghi jkl mno pqr stu"}, + // Spaces at the end + { + "abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu\r\n\t", "abc def ghi jkl mno pqr stu"}, + // Multiple spaces at the end + { + "abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu\r\n\t ", "abc def ghi jkl mno pqr stu"}, + // Multiple spaces in middle + { + "abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu \r\n\t a", "abc def ghi jkl mno pqr stu a"}, + }; + + for (const auto &c: cases) { + SUBCASE(("unfold header " + c.second).c_str()) + { + auto *cpy = new char[c.first.size()]; + memcpy(cpy, c.first.data(), c.first.size()); + auto nlen = rspamd_message_header_unfold_inplace(cpy, c.first.size()); + CHECK(std::string{cpy, nlen} == c.second); + delete[] cpy; + } + } + } } #endif -- 2.39.5