From: Vsevolod Stakhov Date: Wed, 29 Sep 2021 20:13:20 +0000 (+0100) Subject: [Project] Add constant iterators X-Git-Tag: 3.1~106 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=72450991eeb435a5bc1fa74da31fc862bb431e4a;p=rspamd.git [Project] Add constant iterators --- diff --git a/src/libmime/mime_string.cxx b/src/libmime/mime_string.cxx index 1785e9188..96f829574 100644 --- a/src/libmime/mime_string.cxx +++ b/src/libmime/mime_string.cxx @@ -20,34 +20,35 @@ #include "unicode/uchar.h" TEST_SUITE("mime_string") { +using namespace rspamd::mime; TEST_CASE("mime_string unfiltered ctors") { SUBCASE("empty") { - rspamd::mime_string st; + mime_string st; CHECK(st.size() == 0); CHECK(st == ""); } SUBCASE("unfiltered valid") { - rspamd::mime_string st{std::string_view("abcd")}; + mime_string st{std::string_view("abcd")}; CHECK(st == "abcd"); } SUBCASE("unfiltered zero character") { - rspamd::mime_string st{"abc\0d", 5}; + mime_string st{"abc\0d", 5}; CHECK(st.has_zeroes()); CHECK(st == "abcd"); } SUBCASE("unfiltered invalid character - middle") { - rspamd::mime_string st{std::string("abc\234d")}; + mime_string st{std::string("abc\234d")}; CHECK(st.has_invalid()); CHECK(st == "abc\uFFFDd"); } SUBCASE("unfiltered invalid character - end") { - rspamd::mime_string st{std::string("abc\234")}; + mime_string st{std::string("abc\234")}; CHECK(st.has_invalid()); CHECK(st == "abc\uFFFD"); } SUBCASE("unfiltered invalid character - start") { - rspamd::mime_string st{std::string("\234abc")}; + mime_string st{std::string("\234abc")}; CHECK(st.has_invalid()); CHECK(st == "\uFFFDabc"); } @@ -68,32 +69,47 @@ TEST_CASE("mime_string filtered ctors") }; SUBCASE("empty") { - rspamd::mime_string st{std::string_view(""), tolower_filter}; + mime_string st{std::string_view(""), tolower_filter}; CHECK(st.size() == 0); CHECK(st == ""); } SUBCASE("filtered valid") { - rspamd::mime_string st{std::string("AbCdУ"), tolower_filter}; + mime_string st{std::string("AbCdУ"), tolower_filter}; CHECK(st == "abcdу"); } SUBCASE("filtered invalid + filtered") { - rspamd::mime_string st{std::string("abcd\234\1"), print_filter}; + mime_string st{std::string("abcd\234\1"), print_filter}; CHECK(st == "abcd\uFFFD"); } } TEST_CASE("mime_string assign") { SUBCASE("assign from valid") { - rspamd::mime_string st; + mime_string st; CHECK(st.assign_if_valid(std::string("test"))); CHECK(st == "test"); } SUBCASE("assign from invalid") { - rspamd::mime_string st; + mime_string st; CHECK(!st.assign_if_valid(std::string("test\234t"))); CHECK(st == ""); } } + +TEST_CASE("mime_string iterators") +{ + + SUBCASE("unfiltered iterator ascii") { + auto in = std::string("abcd"); + mime_string st{in}; + CHECK(st == "abcd"); + + int i = 0; + for (auto &&c : st) { + CHECK(c == in[i++]); + } + } +} } \ No newline at end of file diff --git a/src/libmime/mime_string.hxx b/src/libmime/mime_string.hxx index c15dfd566..32eafde19 100644 --- a/src/libmime/mime_string.hxx +++ b/src/libmime/mime_string.hxx @@ -28,7 +28,7 @@ #include "unicode/utf8.h" #include "contrib/fastutf8/fastutf8.h" -namespace rspamd { +namespace rspamd::mime { /* * The motivation for another string is to have utf8 valid string replacing * all bad things with FFFFD replacement character and filtering \0 and other @@ -64,12 +64,227 @@ bool operator !(mime_string_flags fl) return fl == mime_string_flags::MIME_STRING_DEFAULT; } +// Codepoint iterator base class +template +struct iterator_base +{ + template + friend class basic_mime_string; + +public: + using value_type = typename Container::value_type; + using difference_type = typename Container::difference_type; + using codepoint_type = typename Container::codepoint_type; + using reference_type = codepoint_type; + using iterator_category = std::bidirectional_iterator_tag; + + bool operator==(const iterator_base &it) const noexcept + { + return idx == it.idx; + } + + bool operator!=(const iterator_base &it) const noexcept + { + return idx != it.idx; + } + + iterator_base(difference_type index, Container *instance) noexcept: + idx(index), cont_instance(instance) {} + iterator_base() noexcept = default; + iterator_base(const iterator_base &) noexcept = default; + + iterator_base &operator=(const iterator_base &) noexcept = default; + + Container *get_instance() const noexcept + { + return cont_instance; + } + + codepoint_type get_value() const noexcept { + auto i = idx; + codepoint_type uc; + U8_NEXT_UNSAFE(cont_instance->data(), i, uc); + return uc; + } + +protected: + difference_type idx; + Container* cont_instance = nullptr; +protected: + void advance(difference_type n) noexcept { + if (n > 0) { + U8_FWD_N_UNSAFE(cont_instance->data(), idx, n); + } + else if (n < 0) { + U8_BACK_N_UNSAFE(cont_instance->data(), idx, (-n)); + } + } + void increment() noexcept { + codepoint_type uc; + U8_NEXT_UNSAFE(cont_instance->data(), idx, uc); + } + + void decrement() noexcept { + codepoint_type uc; + U8_PREV_UNSAFE(cont_instance->data(), idx, uc); + } +}; + +// Partial spec for raw Byte-based iterator base +template +struct iterator_base +{ + template + friend class basic_string; + +public: + using value_type = typename Container::value_type; + using difference_type = typename Container::difference_type; + using reference_type = value_type; + using iterator_category = std::bidirectional_iterator_tag; + + bool operator==( const iterator_base& it ) const noexcept { return idx == it.idx; } + bool operator!=( const iterator_base& it ) const noexcept { return idx != it.idx; } + + iterator_base(difference_type index, Container *instance) noexcept: + idx(index), cont_instance(instance) {} + + iterator_base() noexcept = default; + iterator_base( const iterator_base& ) noexcept = default; + iterator_base& operator=( const iterator_base& ) noexcept = default; + Container* get_instance() const noexcept { return cont_instance; } + + value_type get_value() const noexcept { return cont_instance->storage.at(idx, std::nothrow); } +protected: + difference_type idx; + Container* cont_instance = nullptr; + +protected: + + //! Advance the iterator n times (negative values allowed!) + void advance( difference_type n ) noexcept { + idx += n; + } + + void increment() noexcept { idx ++; } + void decrement() noexcept { idx --; } +}; + +template struct iterator; +template struct const_iterator; + +template +struct iterator : iterator_base { + iterator(typename iterator_base::difference_type index, Container *instance) noexcept: + iterator_base(index, instance) + { + } + iterator() noexcept = default; + iterator(const iterator &) noexcept = default; + + iterator &operator=(const iterator &) noexcept = default; + /* Disallow creating from const_iterator */ + iterator(const const_iterator &) = delete; + + /* Prefix */ + iterator &operator++() noexcept + { + this->increment(); + return *this; + } + + /* Postfix */ + iterator operator++(int) noexcept + { + iterator tmp{this->idx, this->cont_instance}; + this->increment(); + return tmp; + } + + /* Prefix */ + iterator &operator--() noexcept + { + this->decrement(); + return *this; + } + + /* Postfix */ + iterator operator--(int) noexcept + { + iterator tmp{this->idx, this->cont_instance}; + this->decrement(); + return tmp; + } + + iterator operator+(typename iterator_base::difference_type n) const noexcept + { + iterator it{*this}; + it.advance(n); + return it; + } + + iterator &operator+=(typename iterator_base::difference_type n) noexcept + { + this->advance(n); + return *this; + } + + iterator operator-(typename iterator_base::difference_type n) const noexcept + { + iterator it{*this}; + it.advance(-n); + return it; + } + + iterator &operator-=(typename iterator_base::difference_type n) noexcept + { + this->advance(-n); + return *this; + } + + typename iterator::reference_type operator*() const noexcept + { + return this->get_value(); + } +}; + +template +struct const_iterator : iterator { + const_iterator(typename iterator_base::difference_type index, const Container *instance) noexcept: + iterator(index, const_cast(instance)) + { + } + + const_iterator(const iterator &other) noexcept: + iterator(other) + { + } + + const_iterator() noexcept = default; + + const_iterator(const const_iterator &) noexcept = default; + + const_iterator &operator=(const const_iterator &) noexcept = default; + + const typename iterator::reference_type operator*() const noexcept + { + return this->get_value(); + } +}; + template class basic_mime_string : private Allocator { public: using storage_type = std::basic_string, Allocator>; using view_type = std::basic_string_view>; using filter_type = fu2::function_view; + using codepoint_type = UChar32; + using value_type = T; + using difference_type = std::ptrdiff_t; + using iterator = rspamd::mime::iterator; + using const_iterator = rspamd::mime::const_iterator; + using raw_iterator = rspamd::mime::iterator; + using raw_const_iterator = rspamd::mime::const_iterator; /* Ctors */ basic_mime_string() noexcept : Allocator() {} explicit basic_mime_string(const Allocator& alloc) noexcept : Allocator(alloc) {} @@ -204,6 +419,47 @@ public: return false; } + inline iterator begin() noexcept + { + return {0, this}; + } + + inline const_iterator begin() const noexcept + { + return {0, this}; + } + + inline raw_iterator raw_begin() noexcept + { + return {0, this}; + } + + inline raw_const_iterator raw_begin() const noexcept + { + return {0, this}; + } + + inline iterator end() noexcept + { + return {(difference_type) size(), this}; + } + + inline const_iterator end() const noexcept + { + return {(difference_type) size(), this}; + } + + inline raw_iterator raw_end() noexcept + { + return {(difference_type) size(), this}; + } + + inline raw_const_iterator raw_end() const noexcept + { + return {(difference_type) size(), this}; + } + + /* For doctest stringify */ friend std::ostream& operator<< (std::ostream& os, const T& value) { os << value.storage; return os;