]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Add helper library to handle mime strings in a more safe matter
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 29 Sep 2021 17:00:03 +0000 (18:00 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 29 Sep 2021 17:00:03 +0000 (18:00 +0100)
src/libmime/CMakeLists.txt
src/libmime/mime_string.cxx [new file with mode: 0644]
src/libmime/mime_string.hxx [new file with mode: 0644]

index a011dff071057f84a7424323e4a1e3f18db6899b..878ac814908c4d816a3539e79a9024b75a4cd963 100644 (file)
@@ -10,6 +10,7 @@ SET(LIBRSPAMDMIMESRC
                                ${CMAKE_CURRENT_SOURCE_DIR}/mime_headers.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/mime_parser.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/mime_encoding.c
-                               ${CMAKE_CURRENT_SOURCE_DIR}/lang_detection.c)
+                               ${CMAKE_CURRENT_SOURCE_DIR}/lang_detection.c
+               ${CMAKE_CURRENT_SOURCE_DIR}/mime_string.cxx)
 
 SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE)
\ No newline at end of file
diff --git a/src/libmime/mime_string.cxx b/src/libmime/mime_string.cxx
new file mode 100644 (file)
index 0000000..1785e91
--- /dev/null
@@ -0,0 +1,99 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+#include "mime_string.hxx"
+#include "unicode/uchar.h"
+
+TEST_SUITE("mime_string") {
+TEST_CASE("mime_string unfiltered ctors")
+{
+       SUBCASE("empty") {
+               rspamd::mime_string st;
+               CHECK(st.size() == 0);
+               CHECK(st == "");
+       }
+       SUBCASE("unfiltered valid") {
+               rspamd::mime_string st{std::string_view("abcd")};
+               CHECK(st == "abcd");
+       }
+       SUBCASE("unfiltered zero character") {
+               rspamd::mime_string st{"abc\0d", 5};
+               CHECK(st.has_zeroes());
+               CHECK(st == "abcd");
+       }
+       SUBCASE("unfiltered invalid character - middle") {
+               rspamd::mime_string st{std::string("abc\234d")};
+               CHECK(st.has_invalid());
+               CHECK(st == "abc\uFFFDd");
+       }
+       SUBCASE("unfiltered invalid character - end") {
+               rspamd::mime_string st{std::string("abc\234")};
+               CHECK(st.has_invalid());
+               CHECK(st == "abc\uFFFD");
+       }
+       SUBCASE("unfiltered invalid character - start") {
+               rspamd::mime_string st{std::string("\234abc")};
+               CHECK(st.has_invalid());
+               CHECK(st == "\uFFFDabc");
+       }
+}
+
+TEST_CASE("mime_string filtered ctors")
+{
+       auto print_filter = [](UChar32 inp) -> UChar32 {
+               if (!u_isprint(inp)) {
+                       return 0;
+               }
+
+               return inp;
+       };
+
+       auto tolower_filter = [](UChar32 inp) -> UChar32 {
+               return u_tolower(inp);
+       };
+
+       SUBCASE("empty") {
+               rspamd::mime_string st{std::string_view(""), tolower_filter};
+               CHECK(st.size() == 0);
+               CHECK(st == "");
+       }
+       SUBCASE("filtered valid") {
+               rspamd::mime_string st{std::string("AbCdУ"), tolower_filter};
+               CHECK(st == "abcdу");
+       }
+       SUBCASE("filtered invalid + filtered") {
+               rspamd::mime_string st{std::string("abcd\234\1"), print_filter};
+               CHECK(st == "abcd\uFFFD");
+       }
+}
+TEST_CASE("mime_string assign")
+{
+       SUBCASE("assign from valid") {
+               rspamd::mime_string st;
+
+               CHECK(st.assign_if_valid(std::string("test")));
+               CHECK(st == "test");
+       }
+       SUBCASE("assign from invalid") {
+               rspamd::mime_string st;
+
+               CHECK(!st.assign_if_valid(std::string("test\234t")));
+               CHECK(st == "");
+       }
+}
+}
\ No newline at end of file
diff --git a/src/libmime/mime_string.hxx b/src/libmime/mime_string.hxx
new file mode 100644 (file)
index 0000000..4e25f61
--- /dev/null
@@ -0,0 +1,292 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_MIME_STRING_HXX
+#define RSPAMD_MIME_STRING_HXX
+#pragma once
+
+#include <string>
+#include <string_view>
+#include <memory>
+#include <optional>
+#include <cstdint>
+#include <cstdlib>
+#include <iosfwd>
+#include "function2/function2.hpp"
+#include "unicode/utf8.h"
+#include "contrib/fastutf8/fastutf8.h"
+
+namespace rspamd {
+/*
+ * The motivation for another string is to have utf8 valid string replacing
+ * all bad things with FFFFD replacement character and filtering \0 and other
+ * strange stuff defined by policies
+ * This string always exclude \0 characters and ignore them! This is how MUA acts,
+ * and we also store a flag about bad characters
+ */
+template<class T=char, class Allocator = std::allocator<T>> class basic_mime_string;
+
+using mime_string = basic_mime_string<char>;
+
+/* Helpers for type safe flags */
+enum class mime_string_flags : std::uint8_t {
+       MIME_STRING_DEFAULT = 0,
+       MIME_STRING_SEEN_ZEROES = 0x1 << 0,
+       MIME_STRING_SEEN_INVALID = 0x1 << 1,
+};
+
+mime_string_flags operator |(mime_string_flags lhs, mime_string_flags rhs)
+{
+       using ut = std::underlying_type<mime_string_flags>::type;
+       return static_cast<mime_string_flags>(static_cast<ut>(lhs) | static_cast<ut>(rhs));
+}
+
+mime_string_flags operator &(mime_string_flags lhs, mime_string_flags rhs)
+{
+       using ut = std::underlying_type<mime_string_flags>::type;
+       return static_cast<mime_string_flags>(static_cast<ut>(lhs) & static_cast<ut>(rhs));
+}
+
+bool operator !(mime_string_flags fl)
+{
+       return fl == mime_string_flags::MIME_STRING_DEFAULT;
+}
+
+template<class T, class Allocator>
+class basic_mime_string : private Allocator {
+public:
+       using storage_type = std::basic_string<T, std::char_traits<T>, Allocator>;
+       using view_type = std::basic_string_view<T, std::char_traits<T>>;
+       using filter_type = fu2::function_view<UChar32 (UChar32)>;
+       /* Ctors */
+       basic_mime_string() noexcept : Allocator() {}
+       explicit basic_mime_string(const Allocator& alloc) noexcept : Allocator(alloc) {}
+
+       basic_mime_string(const T* str, std::size_t sz, const Allocator& alloc = Allocator()) noexcept :
+                       Allocator(alloc)
+       {
+               append_c_string_unfiltered(str, sz);
+       }
+
+       basic_mime_string(const storage_type &st,
+                                         const Allocator& alloc = Allocator()) noexcept :
+                       basic_mime_string(st.data(), st.size(), alloc) {}
+
+       basic_mime_string(const view_type &st,
+                                         const Allocator& alloc = Allocator()) noexcept :
+                       basic_mime_string(st.data(), st.size(), alloc) {}
+
+       basic_mime_string(const T* str, std::size_t sz,
+                                         filter_type &&filt,
+                                         const Allocator& alloc = Allocator()) noexcept :
+                       Allocator(alloc),
+                       filter_func(std::forward<filter_type>(filt))
+       {
+               append_c_string_filtered(str, sz);
+       }
+
+       basic_mime_string(const storage_type &st,
+                                         filter_type &&filt,
+                                         const Allocator& alloc = Allocator()) noexcept :
+                       basic_mime_string(st.data(), st.size(), std::forward<filter_type>(filt), alloc) {}
+       basic_mime_string(const view_type &st,
+                                         filter_type &&filt,
+                                         const Allocator& alloc = Allocator()) noexcept :
+                       basic_mime_string(st.data(), st.size(), std::forward<filter_type>(filt), alloc) {}
+
+       auto size() const -> std::size_t {
+               return storage.size();
+       }
+
+       auto data() const -> const T* {
+               return storage.data();
+       }
+
+       constexpr auto has_zeroes() const -> bool {
+               return !!(flags & mime_string_flags::MIME_STRING_SEEN_ZEROES);
+       }
+
+       constexpr auto has_invalid() const -> bool {
+               return !!(flags & mime_string_flags::MIME_STRING_SEEN_INVALID);
+       }
+
+       /**
+        * Assign mime string from another string using move operation if a source string
+        * is utf8 valid.
+        * If this function returns false, then ownership has not been transferred
+        * and the `other` string is unmodified as well as the storage
+        * @param other
+        * @return
+        */
+       [[nodiscard]] auto assign_if_valid(storage_type &&other) -> bool {
+               if (filter_func.has_value()) {
+                       /* No way */
+                       return false;
+               }
+               if (rspamd_fast_utf8_validate((const unsigned char *)other.data(), other.size()) == 0) {
+                       std::swap(storage, other);
+
+                       return true;
+               }
+
+               return false;
+       }
+
+       /**
+        * Copy to the internal storage discarding the contained value
+        * @param other
+        * @return
+        */
+       auto assign_copy(const storage_type &other) {
+               storage.clear();
+
+               if (filter_func.has_value()) {
+                       append_c_string_filtered(other.data(), other.size());
+               }
+               else {
+                       append_c_string_unfiltered(other.data(), other.size());
+               }
+       }
+
+       auto append(const T* str, std::size_t size) -> std::size_t {
+               if (filter_func.has_value()) {
+                       return append_c_string_filtered(str, size);
+               }
+               else {
+                       return append_c_string_unfiltered(str, size);
+               }
+       }
+       auto append(const storage_type &other) -> std::size_t {
+               return append(other.data(), other.size());
+       }
+       auto append(const view_type &other) -> std::size_t {
+               return append(other.data(), other.size());
+       }
+
+       auto operator ==(const basic_mime_string &other) {
+               return other.storage == storage;
+       }
+       auto operator ==(const storage_type &other) {
+               return other == storage;
+       }
+       auto operator ==(const view_type &other) {
+               return other == storage;
+       }
+       auto operator ==(const T* other) {
+               if (other == NULL) {
+                       return false;
+               }
+               auto olen = strlen(other);
+               if (storage.size() == olen) {
+                       return memcmp(storage.data(), other, olen) == 0;
+               }
+
+               return false;
+       }
+
+       friend std::ostream& operator<< (std::ostream& os, const T& value) {
+               os << value.storage;
+               return os;
+       }
+private:
+       mime_string_flags flags = mime_string_flags::MIME_STRING_DEFAULT;
+       storage_type storage;
+       std::optional<filter_type> filter_func;
+
+       auto append_c_string_unfiltered(const T* str, std::size_t len) -> std::size_t {
+               /* This is fast path */
+               const auto *p = str;
+               const auto *end = str + len;
+               std::ptrdiff_t err_offset;
+               auto orig_size = storage.size();
+
+               storage.reserve(len + storage.size());
+
+               if (memchr(str, 0, len) != NULL) {
+                       /* Fallback to slow path */
+                       flags = flags | mime_string_flags::MIME_STRING_SEEN_ZEROES;
+                       return append_c_string_filtered(str, len);
+               }
+
+               while (p < end && len > 0 &&
+                       (err_offset = rspamd_fast_utf8_validate((const unsigned char *)p, len)) > 0) {
+                       auto cur_offset = err_offset - 1;
+                       storage.append(p, cur_offset);
+
+                       while (cur_offset < len) {
+                               auto tmp = cur_offset;
+                               UChar32 uc;
+
+                               U8_NEXT(p, cur_offset, len, uc);
+
+                               if (uc < 0) {
+                                       storage.append("\uFFFD");
+                                       flags = flags | mime_string_flags::MIME_STRING_SEEN_INVALID;
+                               }
+                               else {
+                                       cur_offset = tmp;
+                                       break;
+                               }
+                       }
+
+                       p += cur_offset;
+                       len = end - p;
+               }
+
+               storage.append(p, len);
+               return storage.size() - orig_size;
+       }
+
+       auto append_c_string_filtered(const T* str, std::size_t len) -> std::size_t {
+               std::ptrdiff_t i = 0, o = 0;
+               UChar32 uc;
+               char tmp[4];
+               auto orig_size = storage.size();
+               /* Slow path */
+
+               storage.reserve(len + storage.size());
+
+               while (i < len) {
+                       U8_NEXT(str, i, len, uc);
+
+                       if (uc < 0) {
+                               /* Replace with 0xFFFD */
+                               storage.append("\uFFFD");
+                               flags = flags | mime_string_flags::MIME_STRING_SEEN_INVALID;
+                       }
+                       else {
+                               if (filter_func.has_value()) {
+                                       uc = filter_func.value()(uc);
+                               }
+
+                               if (uc == 0) {
+                                       /* Special case, ignore it */
+                                       flags = flags | mime_string_flags::MIME_STRING_SEEN_ZEROES;
+                               }
+                               else {
+                                       o = 0;
+                                       U8_APPEND_UNSAFE(tmp, o, uc);
+                                       storage.append(tmp, o);
+                               }
+                       }
+               }
+
+               return storage.size() - orig_size;
+       }
+};
+
+}
+
+#endif //RSPAMD_MIME_STRING_HXX