From ab34f8889570acb0eb7e687c48586925cb6f8616 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 2 Feb 2021 18:18:45 +0000 Subject: [PATCH] [Project] Css: Projected a parser --- src/libserver/css/css_parser.cxx | 144 ++++++++++++++++++++++++++-- src/libserver/css/css_tokeniser.hxx | 1 + src/libserver/css/parse_error.hxx | 1 + 3 files changed, 137 insertions(+), 9 deletions(-) diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx index 54ccccd23..2133a7b36 100644 --- a/src/libserver/css/css_parser.cxx +++ b/src/libserver/css/css_parser.cxx @@ -16,11 +16,64 @@ #include "css_parser.hxx" #include "css_tokeniser.hxx" +#include #include namespace rspamd::css { +/* + * Represents a consumed token by a parser + */ +struct css_consumed_block { + enum class parser_tag_type : std::uint8_t { + css_top_block, + css_qualified_rule, + css_at_rule, + css_simple_block, + css_function, + css_component + }; + + using consumed_block_ptr = std::unique_ptr; + + parser_tag_type tag; + std::variant, + css_parser_token> content; + + css_consumed_block() = delete; + + css_consumed_block(parser_tag_type tag) : tag(tag) { + if (tag == parser_tag_type::css_top_block || + tag == parser_tag_type::css_qualified_rule || + tag == parser_tag_type::css_simple_block) { + /* Pre-allocate content for known vector blocks */ + content = std::vector(4); + } + } + /* Construct a block from a single lexer token (for trivial blocks) */ + explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) : + tag(tag), content(std::move(tok)) {} + + /* Attach a new block to the compound block, consuming block inside */ + auto attach_block(consumed_block_ptr &&block) -> bool { + if (content.index() == 0) { + /* Switch from monostate */ + content = std::vector(1); + } + else if (content.index() == 2) { + /* A single component, cannot attach a block ! */ + return false; + } + + std::get>(content) + .push_back(std::move(block)); + + return true; + } +}; + class css_parser { public: css_parser(void) = delete; /* Require mempool to be set for logging */ @@ -31,10 +84,10 @@ public: auto get_object_maybe(void) -> tl::expected, css_parse_error> { if (state == parser_state::parse_done) { state = parser_state::initial_state; - return std::move (style_object); + return std::move(style_object); } - return tl::make_unexpected (error); + return tl::make_unexpected(error); } private: @@ -93,17 +146,90 @@ bool css_parser::consume_input(const std::string_view &sv) bool eof = false; css_tokeniser css_tokeniser(pool, sv); - while (!eof) { + auto consumed_blocks = std::make_unique( + css_consumed_block::parser_tag_type::css_top_block); + auto rec_level = 0; + const auto max_rec = 20; + + auto component_value_consumer = [&](std::unique_ptr &top) -> bool { + + if (++rec_level > max_rec) { + error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING); + return false; + } + auto next_token = css_tokeniser.next_token(); - /* Top level parser */ switch (next_token.type) { - case css_parser_token::token_type::eof_token: - eof = true; + + } + + --rec_level; + + return true; + }; + + auto qualified_rule_consumer = [&](std::unique_ptr &top) -> bool { + if (++rec_level > max_rec) { + msg_err_css("max nesting reached, ignore style"); + error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING); + return false; + } + + auto ret = true; + auto block = std::make_unique( + css_consumed_block::parser_tag_type::css_qualified_rule); + + while (ret && !eof) { + auto &&next_token = css_tokeniser.next_token(); + switch (next_token.type) { + case css_parser_token::token_type::eof_token: + eof = true; + break; + case css_parser_token::token_type::ident_token: + case css_parser_token::token_type::hash_token: + /* Consume allowed complex tokens as a rule preamble */ + ret = component_value_consumer(block); + break; + case css_parser_token::token_type::cdo_token: + case css_parser_token::token_type::cdc_token: + if (top->tag == css_consumed_block::parser_tag_type::css_top_block) { + /* Ignore */ + ret = true; + } + else { + + } + break; + }; + } + + if (ret) { + if (top->tag == css_consumed_block::parser_tag_type::css_top_block) { + top->attach_block(std::move(block)); + } + } + + --rec_level; + + return ret; + }; + + auto get_parser_consumer = [&]() -> auto { + switch (state) { + case parser_state::initial_state: + /* Top level qualified parser */ + return qualified_rule_consumer; break; - default: - /* Ignore tokens */ - msg_debug_css("got token: %s", next_token.debug_token_str().c_str()); + } + }; + + while (!eof) { + /* Get a token and a consumer lambda for the current parser state */ + + auto consumer = get_parser_consumer(); + + if (!consumer(consumed_blocks)) { break; } } diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx index b2da88500..7ef5f4643 100644 --- a/src/libserver/css/css_tokeniser.hxx +++ b/src/libserver/css/css_tokeniser.hxx @@ -90,6 +90,7 @@ struct css_parser_token { css_parser_token() = delete; explicit css_parser_token(token_type type, const value_type &value) : value(value), type(type) {} + css_parser_token(css_parser_token &&other) = default; auto adjust_dim(const css_parser_token &dim_token) -> bool; /* Debugging routines */ diff --git a/src/libserver/css/parse_error.hxx b/src/libserver/css/parse_error.hxx index 0a2cbc750..458469afc 100644 --- a/src/libserver/css/parse_error.hxx +++ b/src/libserver/css/parse_error.hxx @@ -30,6 +30,7 @@ namespace rspamd::css { enum class css_parse_error_type { PARSE_ERROR_UNKNOWN_OPTION, PARSE_ERROR_INVALID_SYNTAX, + PARSE_ERROR_BAD_NESTING, PARSE_ERROR_NYI, PARSE_ERROR_UNKNOWN_ERROR, }; -- 2.39.5