]> source.dussan.org Git - rspamd.git/commitdiff
[Test] Add unit tests for unfolding 4716/head
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 25 Nov 2023 13:42:03 +0000 (13:42 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 25 Nov 2023 13:42:03 +0000 (13:42 +0000)
src/libmime/mime_headers.c
src/libmime/mime_headers.h
test/rspamd_cxx_unit_utils.hxx

index 2b6b2acc02e4b36dff92a2a15949fcce184f8dff..e250e84f1363bf22e056471dfc4116adc2f7771f 100644 (file)
@@ -1025,6 +1025,76 @@ rspamd_message_headers_new(void)
        return nhdrs;
 }
 
+gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len)
+{
+       /*
+        * t - tortoise (destination)
+        * h - hare (source)
+        */
+       char *t = hdr, *h = hdr, *end = (hdr + len);
+       enum {
+               copy_chars,
+               folding_cr,
+               folding_lf,
+               folding_ws,
+       } state = copy_chars;
+
+       while (h < end) {
+               switch (state) {
+               case copy_chars:
+                       if (*h == '\r') {
+                               state = folding_cr;
+                               h++;
+                       }
+                       else if (*h == '\n') {
+                               state = folding_lf;
+                               h++;
+                       }
+                       else {
+                               *t++ = *h++;
+                       }
+                       break;
+               case folding_cr:
+                       if (*h == '\n') {
+                               state = folding_lf;
+                               h++;
+                       }
+                       else if (g_ascii_isspace(*h)) {
+                               state = folding_ws;
+                               h++;
+                       }
+                       else {
+                               /* It is weird, not like a folding, so we need to revert back */
+                               *t++ = '\r';
+                               state = copy_chars;
+                       }
+                       break;
+               case folding_lf:
+                       if (g_ascii_isspace(*h)) {
+                               state = folding_ws;
+                               h++;
+                       }
+                       else {
+                               /* It is weird, not like a folding, so we need to revert back */
+                               *t++ = '\n';
+                               state = copy_chars;
+                       }
+                       break;
+               case folding_ws:
+                       if (!g_ascii_isspace(*h)) {
+                               *t++ = ' ';
+                               state = copy_chars;
+                       }
+                       else {
+                               h++;
+                       }
+                       break;
+               }
+       }
+
+       return t - hdr;
+}
+
 void rspamd_message_set_modified_header(struct rspamd_task *task,
                                                                                struct rspamd_mime_headers_table *hdrs,
                                                                                const gchar *hdr_name,
@@ -1201,8 +1271,10 @@ void rspamd_message_set_modified_header(struct rspamd_task *task,
                                        nhdr->name = hdr_elt->name;
                                        nhdr->value = rspamd_mempool_alloc(task->task_pool,
                                                                                                           raw_len + 1);
+                                       /* Strlcpy will ensure that value will have no embedded \0 */
                                        rspamd_strlcpy(nhdr->value, raw_value, raw_len + 1);
-                                       /* TODO: unfold header value, sigh */
+                                       gsize value_len = rspamd_message_header_unfold_inplace(nhdr->value, raw_len);
+                                       nhdr->value[value_len] = '\0';
 
                                        /* Deal with the raw value */
                                        size_t namelen = strlen(hdr_elt->name);
index ffa863e8bb2ad87ab12b4e02b9e312b489628805..60015a20ed72996ba5f5b8a8e38a41a151703471 100644 (file)
@@ -185,6 +185,14 @@ bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *,
  */
 gsize rspamd_strip_smtp_comments_inplace(gchar *input, gsize len);
 
+/**
+ * Unfold header in place
+ * @param hdr header value
+ * @param len length of the header
+ * @return new unfolded length
+ */
+gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len);
+
 #ifdef __cplusplus
 }
 #endif
index 3e53a6d33a9f7f2810bcd326bcc1d42a93cd1a7b..126253fd664cae608e3cb607e61abe22974fb661 100644 (file)
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2021 Vsevolod Stakhov
+/*
+ * Copyright 2023 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -159,6 +159,51 @@ TEST_SUITE("rspamd_utils")
                        rspamd_fstring_free(fstr);
                }
        }
+
+       TEST_CASE("rspamd_message_header_unfold_inplace")
+       {
+               std::vector<std::pair<std::string, std::string>> cases{
+                       {"abc", "abc"},
+                       {"abc\r\n def", "abc def"},
+                       {"abc\r\n\tdef", "abc def"},
+                       {"abc\r\n\tdef\r\n\tghi", "abc def ghi"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n", "abc def ghi"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\t", "abc def ghi"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl", "abc def ghi jkl"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n", "abc def ghi jkl"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\t", "abc def ghi jkl"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno", "abc def ghi jkl mno"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n", "abc def ghi jkl mno"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\t", "abc def ghi jkl mno"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr", "abc def ghi jkl mno pqr"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n", "abc def ghi jkl mno pqr"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\t", "abc def ghi jkl mno pqr"},
+                       {"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu", "abc def ghi jkl mno pqr stu"},
+                       // Newline at the end
+                       {
+                               "abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu\r\n", "abc def ghi jkl mno pqr stu"},
+                       // Spaces at the end
+                       {
+                               "abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu\r\n\t", "abc def ghi jkl mno pqr stu"},
+                       // Multiple spaces at the end
+                       {
+                               "abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu\r\n\t   ", "abc def ghi jkl mno pqr stu"},
+                       // Multiple spaces in middle
+                       {
+                               "abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu   \r\n\t   a", "abc def ghi jkl mno pqr stu    a"},
+               };
+
+               for (const auto &c: cases) {
+                       SUBCASE(("unfold header " + c.second).c_str())
+                       {
+                               auto *cpy = new char[c.first.size()];
+                               memcpy(cpy, c.first.data(), c.first.size());
+                               auto nlen = rspamd_message_header_unfold_inplace(cpy, c.first.size());
+                               CHECK(std::string{cpy, nlen} == c.second);
+                               delete[] cpy;
+                       }
+               }
+       }
 }
 
 #endif