Explorar el Código

[Project] Add helper library to handle mime strings in a more safe matter

tags/3.1
Vsevolod Stakhov hace 2 años
padre
commit
d7fde71507
Se han modificado 3 ficheros con 393 adiciones y 1 borrados
  1. 2
    1
      src/libmime/CMakeLists.txt
  2. 99
    0
      src/libmime/mime_string.cxx
  3. 292
    0
      src/libmime/mime_string.hxx

+ 2
- 1
src/libmime/CMakeLists.txt Ver fichero

@@ -10,6 +10,7 @@ SET(LIBRSPAMDMIMESRC
${CMAKE_CURRENT_SOURCE_DIR}/mime_headers.c
${CMAKE_CURRENT_SOURCE_DIR}/mime_parser.c
${CMAKE_CURRENT_SOURCE_DIR}/mime_encoding.c
${CMAKE_CURRENT_SOURCE_DIR}/lang_detection.c)
${CMAKE_CURRENT_SOURCE_DIR}/lang_detection.c
${CMAKE_CURRENT_SOURCE_DIR}/mime_string.cxx)

SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE)

+ 99
- 0
src/libmime/mime_string.cxx Ver fichero

@@ -0,0 +1,99 @@
/*-
* Copyright 2021 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
#include "doctest/doctest.h"
#include "mime_string.hxx"
#include "unicode/uchar.h"

TEST_SUITE("mime_string") {
TEST_CASE("mime_string unfiltered ctors")
{
SUBCASE("empty") {
rspamd::mime_string st;
CHECK(st.size() == 0);
CHECK(st == "");
}
SUBCASE("unfiltered valid") {
rspamd::mime_string st{std::string_view("abcd")};
CHECK(st == "abcd");
}
SUBCASE("unfiltered zero character") {
rspamd::mime_string st{"abc\0d", 5};
CHECK(st.has_zeroes());
CHECK(st == "abcd");
}
SUBCASE("unfiltered invalid character - middle") {
rspamd::mime_string st{std::string("abc\234d")};
CHECK(st.has_invalid());
CHECK(st == "abc\uFFFDd");
}
SUBCASE("unfiltered invalid character - end") {
rspamd::mime_string st{std::string("abc\234")};
CHECK(st.has_invalid());
CHECK(st == "abc\uFFFD");
}
SUBCASE("unfiltered invalid character - start") {
rspamd::mime_string st{std::string("\234abc")};
CHECK(st.has_invalid());
CHECK(st == "\uFFFDabc");
}
}

TEST_CASE("mime_string filtered ctors")
{
auto print_filter = [](UChar32 inp) -> UChar32 {
if (!u_isprint(inp)) {
return 0;
}

return inp;
};

auto tolower_filter = [](UChar32 inp) -> UChar32 {
return u_tolower(inp);
};

SUBCASE("empty") {
rspamd::mime_string st{std::string_view(""), tolower_filter};
CHECK(st.size() == 0);
CHECK(st == "");
}
SUBCASE("filtered valid") {
rspamd::mime_string st{std::string("AbCdУ"), tolower_filter};
CHECK(st == "abcdу");
}
SUBCASE("filtered invalid + filtered") {
rspamd::mime_string st{std::string("abcd\234\1"), print_filter};
CHECK(st == "abcd\uFFFD");
}
}
TEST_CASE("mime_string assign")
{
SUBCASE("assign from valid") {
rspamd::mime_string st;

CHECK(st.assign_if_valid(std::string("test")));
CHECK(st == "test");
}
SUBCASE("assign from invalid") {
rspamd::mime_string st;

CHECK(!st.assign_if_valid(std::string("test\234t")));
CHECK(st == "");
}
}
}

+ 292
- 0
src/libmime/mime_string.hxx Ver fichero

@@ -0,0 +1,292 @@
/*-
* Copyright 2021 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef RSPAMD_MIME_STRING_HXX
#define RSPAMD_MIME_STRING_HXX
#pragma once

#include <string>
#include <string_view>
#include <memory>
#include <optional>
#include <cstdint>
#include <cstdlib>
#include <iosfwd>
#include "function2/function2.hpp"
#include "unicode/utf8.h"
#include "contrib/fastutf8/fastutf8.h"

namespace rspamd {
/*
* The motivation for another string is to have utf8 valid string replacing
* all bad things with FFFFD replacement character and filtering \0 and other
* strange stuff defined by policies
* This string always exclude \0 characters and ignore them! This is how MUA acts,
* and we also store a flag about bad characters
*/
template<class T=char, class Allocator = std::allocator<T>> class basic_mime_string;

using mime_string = basic_mime_string<char>;

/* Helpers for type safe flags */
enum class mime_string_flags : std::uint8_t {
MIME_STRING_DEFAULT = 0,
MIME_STRING_SEEN_ZEROES = 0x1 << 0,
MIME_STRING_SEEN_INVALID = 0x1 << 1,
};

mime_string_flags operator |(mime_string_flags lhs, mime_string_flags rhs)
{
using ut = std::underlying_type<mime_string_flags>::type;
return static_cast<mime_string_flags>(static_cast<ut>(lhs) | static_cast<ut>(rhs));
}

mime_string_flags operator &(mime_string_flags lhs, mime_string_flags rhs)
{
using ut = std::underlying_type<mime_string_flags>::type;
return static_cast<mime_string_flags>(static_cast<ut>(lhs) & static_cast<ut>(rhs));
}

bool operator !(mime_string_flags fl)
{
return fl == mime_string_flags::MIME_STRING_DEFAULT;
}

template<class T, class Allocator>
class basic_mime_string : private Allocator {
public:
using storage_type = std::basic_string<T, std::char_traits<T>, Allocator>;
using view_type = std::basic_string_view<T, std::char_traits<T>>;
using filter_type = fu2::function_view<UChar32 (UChar32)>;
/* Ctors */
basic_mime_string() noexcept : Allocator() {}
explicit basic_mime_string(const Allocator& alloc) noexcept : Allocator(alloc) {}

basic_mime_string(const T* str, std::size_t sz, const Allocator& alloc = Allocator()) noexcept :
Allocator(alloc)
{
append_c_string_unfiltered(str, sz);
}

basic_mime_string(const storage_type &st,
const Allocator& alloc = Allocator()) noexcept :
basic_mime_string(st.data(), st.size(), alloc) {}

basic_mime_string(const view_type &st,
const Allocator& alloc = Allocator()) noexcept :
basic_mime_string(st.data(), st.size(), alloc) {}

basic_mime_string(const T* str, std::size_t sz,
filter_type &&filt,
const Allocator& alloc = Allocator()) noexcept :
Allocator(alloc),
filter_func(std::forward<filter_type>(filt))
{
append_c_string_filtered(str, sz);
}

basic_mime_string(const storage_type &st,
filter_type &&filt,
const Allocator& alloc = Allocator()) noexcept :
basic_mime_string(st.data(), st.size(), std::forward<filter_type>(filt), alloc) {}
basic_mime_string(const view_type &st,
filter_type &&filt,
const Allocator& alloc = Allocator()) noexcept :
basic_mime_string(st.data(), st.size(), std::forward<filter_type>(filt), alloc) {}

auto size() const -> std::size_t {
return storage.size();
}

auto data() const -> const T* {
return storage.data();
}

constexpr auto has_zeroes() const -> bool {
return !!(flags & mime_string_flags::MIME_STRING_SEEN_ZEROES);
}

constexpr auto has_invalid() const -> bool {
return !!(flags & mime_string_flags::MIME_STRING_SEEN_INVALID);
}

/**
* Assign mime string from another string using move operation if a source string
* is utf8 valid.
* If this function returns false, then ownership has not been transferred
* and the `other` string is unmodified as well as the storage
* @param other
* @return
*/
[[nodiscard]] auto assign_if_valid(storage_type &&other) -> bool {
if (filter_func.has_value()) {
/* No way */
return false;
}
if (rspamd_fast_utf8_validate((const unsigned char *)other.data(), other.size()) == 0) {
std::swap(storage, other);

return true;
}

return false;
}

/**
* Copy to the internal storage discarding the contained value
* @param other
* @return
*/
auto assign_copy(const storage_type &other) {
storage.clear();

if (filter_func.has_value()) {
append_c_string_filtered(other.data(), other.size());
}
else {
append_c_string_unfiltered(other.data(), other.size());
}
}

auto append(const T* str, std::size_t size) -> std::size_t {
if (filter_func.has_value()) {
return append_c_string_filtered(str, size);
}
else {
return append_c_string_unfiltered(str, size);
}
}
auto append(const storage_type &other) -> std::size_t {
return append(other.data(), other.size());
}
auto append(const view_type &other) -> std::size_t {
return append(other.data(), other.size());
}

auto operator ==(const basic_mime_string &other) {
return other.storage == storage;
}
auto operator ==(const storage_type &other) {
return other == storage;
}
auto operator ==(const view_type &other) {
return other == storage;
}
auto operator ==(const T* other) {
if (other == NULL) {
return false;
}
auto olen = strlen(other);
if (storage.size() == olen) {
return memcmp(storage.data(), other, olen) == 0;
}

return false;
}

friend std::ostream& operator<< (std::ostream& os, const T& value) {
os << value.storage;
return os;
}
private:
mime_string_flags flags = mime_string_flags::MIME_STRING_DEFAULT;
storage_type storage;
std::optional<filter_type> filter_func;

auto append_c_string_unfiltered(const T* str, std::size_t len) -> std::size_t {
/* This is fast path */
const auto *p = str;
const auto *end = str + len;
std::ptrdiff_t err_offset;
auto orig_size = storage.size();

storage.reserve(len + storage.size());

if (memchr(str, 0, len) != NULL) {
/* Fallback to slow path */
flags = flags | mime_string_flags::MIME_STRING_SEEN_ZEROES;
return append_c_string_filtered(str, len);
}

while (p < end && len > 0 &&
(err_offset = rspamd_fast_utf8_validate((const unsigned char *)p, len)) > 0) {
auto cur_offset = err_offset - 1;
storage.append(p, cur_offset);

while (cur_offset < len) {
auto tmp = cur_offset;
UChar32 uc;

U8_NEXT(p, cur_offset, len, uc);

if (uc < 0) {
storage.append("\uFFFD");
flags = flags | mime_string_flags::MIME_STRING_SEEN_INVALID;
}
else {
cur_offset = tmp;
break;
}
}

p += cur_offset;
len = end - p;
}

storage.append(p, len);
return storage.size() - orig_size;
}

auto append_c_string_filtered(const T* str, std::size_t len) -> std::size_t {
std::ptrdiff_t i = 0, o = 0;
UChar32 uc;
char tmp[4];
auto orig_size = storage.size();
/* Slow path */

storage.reserve(len + storage.size());

while (i < len) {
U8_NEXT(str, i, len, uc);

if (uc < 0) {
/* Replace with 0xFFFD */
storage.append("\uFFFD");
flags = flags | mime_string_flags::MIME_STRING_SEEN_INVALID;
}
else {
if (filter_func.has_value()) {
uc = filter_func.value()(uc);
}

if (uc == 0) {
/* Special case, ignore it */
flags = flags | mime_string_flags::MIME_STRING_SEEN_ZEROES;
}
else {
o = 0;
U8_APPEND_UNSAFE(tmp, o, uc);
storage.append(tmp, o);
}
}
}

return storage.size() - orig_size;
}
};

}

#endif //RSPAMD_MIME_STRING_HXX

Cargando…
Cancelar
Guardar