diff options
-rw-r--r-- | src/libserver/css/css_parser.cxx | 12 | ||||
-rw-r--r-- | src/libserver/css/css_tokeniser.cxx | 72 | ||||
-rw-r--r-- | src/libserver/css/css_tokeniser.hxx | 72 |
3 files changed, 106 insertions, 50 deletions
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx index 207cfcb9d..68f03cdfa 100644 --- a/src/libserver/css/css_parser.cxx +++ b/src/libserver/css/css_parser.cxx @@ -94,16 +94,16 @@ bool css_parser::consume_input(const std::string_view &sv) css_tokeniser css_tokeniser(pool, sv); while (!eof) { - auto token_pair = css_tokeniser.next_token(); + auto next_token = css_tokeniser.next_token(); /* Top level parser */ - switch (token_pair.first) { - case css_parser_token::eof_token: + switch (next_token.type) { + case css_parser_token::token_type::eof_token: eof = true; break; - case css_parser_token::whitespace_token: - case css_parser_token::cdc_token: - case css_parser_token::cdo_token: + case css_parser_token::token_type::whitespace_token: + case css_parser_token::token_type::cdc_token: + case css_parser_token::token_type::cdo_token: /* Ignore tokens */ break; } diff --git a/src/libserver/css/css_tokeniser.cxx b/src/libserver/css/css_tokeniser.cxx index 40f202b01..058f7504e 100644 --- a/src/libserver/css/css_tokeniser.cxx +++ b/src/libserver/css/css_tokeniser.cxx @@ -19,8 +19,46 @@ namespace rspamd::css { +/* Helpers to create tokens */ -auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string_view> +/* + * This helper is intended to create tokens either with a tag and value + * or with just a tag. + */ +template<css_parser_token::token_type T, typename ...Args> +auto make_token(const Args&... args) -> css_parser_token; + +template<> +auto make_token<css_parser_token::token_type::string_token, std::string_view>(const std::string_view &s) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::string_token, s}; +} + +template<> +auto make_token<css_parser_token::token_type::whitespace_token, std::string_view>(const std::string_view &s) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::whitespace_token, s}; +} + +template<> +auto make_token<css_parser_token::token_type::delim_token, char>(const char &c) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::delim_token, c}; +} + +/* + * Generic tokens with no value (non-terminals) + */ +template<css_parser_token::token_type T> +auto make_token(void) -> css_parser_token +{ + return css_parser_token{T, css_parser_token_placeholder()}; +} + +auto css_tokeniser::next_token(void) -> struct css_parser_token { /* Helpers */ @@ -29,7 +67,7 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string * offset is set to the next character after a comment (or eof) * Nothing is returned */ - auto consume_comment = [this] () { + auto consume_comment = [this]() { auto i = offset; auto nested = 0; @@ -64,7 +102,7 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string * is set one character after the string. Css unescaping is done automatically * Accepts a quote char to find end of string */ - auto consume_string = [this] (auto quote_char) -> auto { + auto consume_string = [this](auto quote_char) -> auto { auto i = offset; bool need_unescape = false; @@ -122,8 +160,7 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string } else { offset = i + 1; - return std::make_pair (css_parser_token::delim_token, - std::string_view (&input[offset - 1], 1)); + return make_token<css_parser_token::token_type::delim_token>(c); } break; case ' ': @@ -136,48 +173,41 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string c = input[++i]; } while (i < input.size () && g_ascii_isspace (c)); - auto ret = std::make_pair (css_parser_token::whitespace_token, - std::string_view (&input[offset], i - offset)); + auto ret = make_token<css_parser_token::token_type::whitespace_token>( + std::string_view(&input[offset], i - offset)); offset = i; return ret; } case '"': case '\'': offset = i + 1; - return std::make_pair (css_parser_token::string_token, - consume_string (c)); + return make_token<css_parser_token::token_type::string_token>(consume_string(c)); case '(': offset = i + 1; - return std::make_pair (css_parser_token::obrace_token, - std::string_view (&input[offset - 1], 1)); + return make_token<css_parser_token::token_type::obrace_token>(); case ')': offset = i + 1; - return std::make_pair (css_parser_token::ebrace_token, - std::string_view (&input[offset - 1], 1)); + return make_token<css_parser_token::token_type::ebrace_token>(); case ',': - offset = i + 1; - return std::make_pair (css_parser_token::comma_token, - std::string_view (&input[offset - 1], 1)); + return make_token<css_parser_token::token_type::comma_token>(); case '<': /* Maybe an xml like comment */ if (i + 3 < input.size () && input[i + 1] == '!' && input[i + 2] == '-' && input[i + 3] == '-') { offset += 3; - return std::make_pair (css_parser_token::cdo_token, - std::string_view (&input[offset - 3], 3)); + return make_token<css_parser_token::token_type::cdo_token>(); } else { offset = i + 1; - return std::make_pair (css_parser_token::delim_token, - std::string_view (&input[offset - 1], 1)); + return make_token<css_parser_token::token_type::delim_token>(c); } break; } } - return std::make_pair (css_parser_token::eof_token, std::string_view ()); + return make_token<css_parser_token::token_type::eof_token>(); } }
\ No newline at end of file diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx index 4c6824389..cff5877c2 100644 --- a/src/libserver/css/css_tokeniser.hxx +++ b/src/libserver/css/css_tokeniser.hxx @@ -21,41 +21,67 @@ #include <string_view> #include <utility> +#include <variant> #include "mem_pool.h" namespace rspamd::css { -enum class css_parser_token { - whitespace_token, - ident_token, - function_token, - at_keyword_token, - hash_token, - string_token, - number_token, - url_token, - dimension_token, - percentage_token, - cdo_token, /* xml open comment */ - cdc_token, /* xml close comment */ - delim_token, - obrace_token, /* ( */ - ebrace_token, /* ) */ - osqbrace_token, /* [ */ - esqbrace_token, /* ] */ - comma_token, - colon_token, - semicolon_token, - eof_token, +struct css_parser_token_placeholder {}; /* For empty tokens */ + +struct css_parser_token { + enum class token_type : std::uint8_t { + whitespace_token, + ident_token, + function_token, + at_keyword_token, + hash_token, + string_token, + number_token, + url_token, + dimension_token, + percentage_token, + cdo_token, /* xml open comment */ + cdc_token, /* xml close comment */ + delim_token, + obrace_token, /* ( */ + ebrace_token, /* ) */ + osqbrace_token, /* [ */ + esqbrace_token, /* ] */ + comma_token, + colon_token, + semicolon_token, + eof_token, + }; + + static const std::uint8_t default_flags = 0; + static const std::uint8_t flag_bad_string = (1u << 0u); + using value_type = std::variant<std::string_view, /* For strings and string like tokens */ + char, /* For delimiters (might need to move to unicode point) */ + double, /* For numeric stuff */ + css_parser_token_placeholder /* For general no token stuff */ + >; + + /* Typed storage */ + value_type value; + token_type type; + std::uint8_t flags = default_flags; + + css_parser_token() = delete; + explicit css_parser_token(token_type type, const value_type &value) : + value(value), type(type) {} }; +/* Ensure that parser tokens are simple enough */ +static_assert(std::is_trivially_copyable_v<css_parser_token>); + class css_tokeniser { public: css_tokeniser() = delete; css_tokeniser(rspamd_mempool_t *pool, const std::string_view &sv) : input(sv), offset(0), pool(pool) {} - auto next_token(void) -> std::pair<css_parser_token, std::string_view>; + auto next_token(void) -> struct css_parser_token; + auto get_offset(void) const { return offset; } private: std::string_view input; std::size_t offset; |