diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-09-29 18:00:03 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-09-29 18:00:03 +0100 |
commit | d7fde715073a96dbcff7ecc69fe7b5ada3a1d045 (patch) | |
tree | d77d70891ab189c14eb7d983fcafde64689da13a /src/libmime/mime_string.hxx | |
parent | e3a6c03b42500262a4048dd32def12623cfb2619 (diff) | |
download | rspamd-d7fde715073a96dbcff7ecc69fe7b5ada3a1d045.tar.gz rspamd-d7fde715073a96dbcff7ecc69fe7b5ada3a1d045.zip |
[Project] Add helper library to handle mime strings in a more safe matter
Diffstat (limited to 'src/libmime/mime_string.hxx')
-rw-r--r-- | src/libmime/mime_string.hxx | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/src/libmime/mime_string.hxx b/src/libmime/mime_string.hxx new file mode 100644 index 000000000..4e25f6170 --- /dev/null +++ b/src/libmime/mime_string.hxx @@ -0,0 +1,292 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RSPAMD_MIME_STRING_HXX +#define RSPAMD_MIME_STRING_HXX +#pragma once + +#include <string> +#include <string_view> +#include <memory> +#include <optional> +#include <cstdint> +#include <cstdlib> +#include <iosfwd> +#include "function2/function2.hpp" +#include "unicode/utf8.h" +#include "contrib/fastutf8/fastutf8.h" + +namespace rspamd { +/* + * The motivation for another string is to have utf8 valid string replacing + * all bad things with FFFFD replacement character and filtering \0 and other + * strange stuff defined by policies + * This string always exclude \0 characters and ignore them! This is how MUA acts, + * and we also store a flag about bad characters + */ +template<class T=char, class Allocator = std::allocator<T>> class basic_mime_string; + +using mime_string = basic_mime_string<char>; + +/* Helpers for type safe flags */ +enum class mime_string_flags : std::uint8_t { + MIME_STRING_DEFAULT = 0, + MIME_STRING_SEEN_ZEROES = 0x1 << 0, + MIME_STRING_SEEN_INVALID = 0x1 << 1, +}; + +mime_string_flags operator |(mime_string_flags lhs, mime_string_flags rhs) +{ + using ut = std::underlying_type<mime_string_flags>::type; + return static_cast<mime_string_flags>(static_cast<ut>(lhs) | static_cast<ut>(rhs)); +} + +mime_string_flags operator &(mime_string_flags lhs, mime_string_flags rhs) +{ + using ut = std::underlying_type<mime_string_flags>::type; + return static_cast<mime_string_flags>(static_cast<ut>(lhs) & static_cast<ut>(rhs)); +} + +bool operator !(mime_string_flags fl) +{ + return fl == mime_string_flags::MIME_STRING_DEFAULT; +} + +template<class T, class Allocator> +class basic_mime_string : private Allocator { +public: + using storage_type = std::basic_string<T, std::char_traits<T>, Allocator>; + using view_type = std::basic_string_view<T, std::char_traits<T>>; + using filter_type = fu2::function_view<UChar32 (UChar32)>; + /* Ctors */ + basic_mime_string() noexcept : Allocator() {} + explicit basic_mime_string(const Allocator& alloc) noexcept : Allocator(alloc) {} + + basic_mime_string(const T* str, std::size_t sz, const Allocator& alloc = Allocator()) noexcept : + Allocator(alloc) + { + append_c_string_unfiltered(str, sz); + } + + basic_mime_string(const storage_type &st, + const Allocator& alloc = Allocator()) noexcept : + basic_mime_string(st.data(), st.size(), alloc) {} + + basic_mime_string(const view_type &st, + const Allocator& alloc = Allocator()) noexcept : + basic_mime_string(st.data(), st.size(), alloc) {} + + basic_mime_string(const T* str, std::size_t sz, + filter_type &&filt, + const Allocator& alloc = Allocator()) noexcept : + Allocator(alloc), + filter_func(std::forward<filter_type>(filt)) + { + append_c_string_filtered(str, sz); + } + + basic_mime_string(const storage_type &st, + filter_type &&filt, + const Allocator& alloc = Allocator()) noexcept : + basic_mime_string(st.data(), st.size(), std::forward<filter_type>(filt), alloc) {} + basic_mime_string(const view_type &st, + filter_type &&filt, + const Allocator& alloc = Allocator()) noexcept : + basic_mime_string(st.data(), st.size(), std::forward<filter_type>(filt), alloc) {} + + auto size() const -> std::size_t { + return storage.size(); + } + + auto data() const -> const T* { + return storage.data(); + } + + constexpr auto has_zeroes() const -> bool { + return !!(flags & mime_string_flags::MIME_STRING_SEEN_ZEROES); + } + + constexpr auto has_invalid() const -> bool { + return !!(flags & mime_string_flags::MIME_STRING_SEEN_INVALID); + } + + /** + * Assign mime string from another string using move operation if a source string + * is utf8 valid. + * If this function returns false, then ownership has not been transferred + * and the `other` string is unmodified as well as the storage + * @param other + * @return + */ + [[nodiscard]] auto assign_if_valid(storage_type &&other) -> bool { + if (filter_func.has_value()) { + /* No way */ + return false; + } + if (rspamd_fast_utf8_validate((const unsigned char *)other.data(), other.size()) == 0) { + std::swap(storage, other); + + return true; + } + + return false; + } + + /** + * Copy to the internal storage discarding the contained value + * @param other + * @return + */ + auto assign_copy(const storage_type &other) { + storage.clear(); + + if (filter_func.has_value()) { + append_c_string_filtered(other.data(), other.size()); + } + else { + append_c_string_unfiltered(other.data(), other.size()); + } + } + + auto append(const T* str, std::size_t size) -> std::size_t { + if (filter_func.has_value()) { + return append_c_string_filtered(str, size); + } + else { + return append_c_string_unfiltered(str, size); + } + } + auto append(const storage_type &other) -> std::size_t { + return append(other.data(), other.size()); + } + auto append(const view_type &other) -> std::size_t { + return append(other.data(), other.size()); + } + + auto operator ==(const basic_mime_string &other) { + return other.storage == storage; + } + auto operator ==(const storage_type &other) { + return other == storage; + } + auto operator ==(const view_type &other) { + return other == storage; + } + auto operator ==(const T* other) { + if (other == NULL) { + return false; + } + auto olen = strlen(other); + if (storage.size() == olen) { + return memcmp(storage.data(), other, olen) == 0; + } + + return false; + } + + friend std::ostream& operator<< (std::ostream& os, const T& value) { + os << value.storage; + return os; + } +private: + mime_string_flags flags = mime_string_flags::MIME_STRING_DEFAULT; + storage_type storage; + std::optional<filter_type> filter_func; + + auto append_c_string_unfiltered(const T* str, std::size_t len) -> std::size_t { + /* This is fast path */ + const auto *p = str; + const auto *end = str + len; + std::ptrdiff_t err_offset; + auto orig_size = storage.size(); + + storage.reserve(len + storage.size()); + + if (memchr(str, 0, len) != NULL) { + /* Fallback to slow path */ + flags = flags | mime_string_flags::MIME_STRING_SEEN_ZEROES; + return append_c_string_filtered(str, len); + } + + while (p < end && len > 0 && + (err_offset = rspamd_fast_utf8_validate((const unsigned char *)p, len)) > 0) { + auto cur_offset = err_offset - 1; + storage.append(p, cur_offset); + + while (cur_offset < len) { + auto tmp = cur_offset; + UChar32 uc; + + U8_NEXT(p, cur_offset, len, uc); + + if (uc < 0) { + storage.append("\uFFFD"); + flags = flags | mime_string_flags::MIME_STRING_SEEN_INVALID; + } + else { + cur_offset = tmp; + break; + } + } + + p += cur_offset; + len = end - p; + } + + storage.append(p, len); + return storage.size() - orig_size; + } + + auto append_c_string_filtered(const T* str, std::size_t len) -> std::size_t { + std::ptrdiff_t i = 0, o = 0; + UChar32 uc; + char tmp[4]; + auto orig_size = storage.size(); + /* Slow path */ + + storage.reserve(len + storage.size()); + + while (i < len) { + U8_NEXT(str, i, len, uc); + + if (uc < 0) { + /* Replace with 0xFFFD */ + storage.append("\uFFFD"); + flags = flags | mime_string_flags::MIME_STRING_SEEN_INVALID; + } + else { + if (filter_func.has_value()) { + uc = filter_func.value()(uc); + } + + if (uc == 0) { + /* Special case, ignore it */ + flags = flags | mime_string_flags::MIME_STRING_SEEN_ZEROES; + } + else { + o = 0; + U8_APPEND_UNSAFE(tmp, o, uc); + storage.append(tmp, o); + } + } + } + + return storage.size() - orig_size; + } +}; + +} + +#endif //RSPAMD_MIME_STRING_HXX |