diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-02-02 18:18:45 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-02-02 18:18:45 +0000 |
commit | ab34f8889570acb0eb7e687c48586925cb6f8616 (patch) | |
tree | ae9c3097613dabb48332dd661b15131a2c50c1c2 /src/libserver | |
parent | 957e841f1d7d045a33f5d363fe81f33370190b20 (diff) | |
download | rspamd-ab34f8889570acb0eb7e687c48586925cb6f8616.tar.gz rspamd-ab34f8889570acb0eb7e687c48586925cb6f8616.zip |
[Project] Css: Projected a parser
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/css/css_parser.cxx | 144 | ||||
-rw-r--r-- | src/libserver/css/css_tokeniser.hxx | 1 | ||||
-rw-r--r-- | src/libserver/css/parse_error.hxx | 1 |
3 files changed, 137 insertions, 9 deletions
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx index 54ccccd23..2133a7b36 100644 --- a/src/libserver/css/css_parser.cxx +++ b/src/libserver/css/css_parser.cxx @@ -16,11 +16,64 @@ #include "css_parser.hxx" #include "css_tokeniser.hxx" +#include <vector> #include <unicode/utf8.h> namespace rspamd::css { +/* + * Represents a consumed token by a parser + */ +struct css_consumed_block { + enum class parser_tag_type : std::uint8_t { + css_top_block, + css_qualified_rule, + css_at_rule, + css_simple_block, + css_function, + css_component + }; + + using consumed_block_ptr = std::unique_ptr<css_consumed_block>; + + parser_tag_type tag; + std::variant<std::monostate, + std::vector<consumed_block_ptr>, + css_parser_token> content; + + css_consumed_block() = delete; + + css_consumed_block(parser_tag_type tag) : tag(tag) { + if (tag == parser_tag_type::css_top_block || + tag == parser_tag_type::css_qualified_rule || + tag == parser_tag_type::css_simple_block) { + /* Pre-allocate content for known vector blocks */ + content = std::vector<consumed_block_ptr>(4); + } + } + /* Construct a block from a single lexer token (for trivial blocks) */ + explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) : + tag(tag), content(std::move(tok)) {} + + /* Attach a new block to the compound block, consuming block inside */ + auto attach_block(consumed_block_ptr &&block) -> bool { + if (content.index() == 0) { + /* Switch from monostate */ + content = std::vector<consumed_block_ptr>(1); + } + else if (content.index() == 2) { + /* A single component, cannot attach a block ! */ + return false; + } + + std::get<std::vector<consumed_block_ptr>>(content) + .push_back(std::move(block)); + + return true; + } +}; + class css_parser { public: css_parser(void) = delete; /* Require mempool to be set for logging */ @@ -31,10 +84,10 @@ public: auto get_object_maybe(void) -> tl::expected<std::unique_ptr<css_style_sheet>, css_parse_error> { if (state == parser_state::parse_done) { state = parser_state::initial_state; - return std::move (style_object); + return std::move(style_object); } - return tl::make_unexpected (error); + return tl::make_unexpected(error); } private: @@ -93,17 +146,90 @@ bool css_parser::consume_input(const std::string_view &sv) bool eof = false; css_tokeniser css_tokeniser(pool, sv); - while (!eof) { + auto consumed_blocks = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_top_block); + auto rec_level = 0; + const auto max_rec = 20; + + auto component_value_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool { + + if (++rec_level > max_rec) { + error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING); + return false; + } + auto next_token = css_tokeniser.next_token(); - /* Top level parser */ switch (next_token.type) { - case css_parser_token::token_type::eof_token: - eof = true; + + } + + --rec_level; + + return true; + }; + + auto qualified_rule_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool { + if (++rec_level > max_rec) { + msg_err_css("max nesting reached, ignore style"); + error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING); + return false; + } + + auto ret = true; + auto block = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_qualified_rule); + + while (ret && !eof) { + auto &&next_token = css_tokeniser.next_token(); + switch (next_token.type) { + case css_parser_token::token_type::eof_token: + eof = true; + break; + case css_parser_token::token_type::ident_token: + case css_parser_token::token_type::hash_token: + /* Consume allowed complex tokens as a rule preamble */ + ret = component_value_consumer(block); + break; + case css_parser_token::token_type::cdo_token: + case css_parser_token::token_type::cdc_token: + if (top->tag == css_consumed_block::parser_tag_type::css_top_block) { + /* Ignore */ + ret = true; + } + else { + + } + break; + }; + } + + if (ret) { + if (top->tag == css_consumed_block::parser_tag_type::css_top_block) { + top->attach_block(std::move(block)); + } + } + + --rec_level; + + return ret; + }; + + auto get_parser_consumer = [&]() -> auto { + switch (state) { + case parser_state::initial_state: + /* Top level qualified parser */ + return qualified_rule_consumer; break; - default: - /* Ignore tokens */ - msg_debug_css("got token: %s", next_token.debug_token_str().c_str()); + } + }; + + while (!eof) { + /* Get a token and a consumer lambda for the current parser state */ + + auto consumer = get_parser_consumer(); + + if (!consumer(consumed_blocks)) { break; } } diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx index b2da88500..7ef5f4643 100644 --- a/src/libserver/css/css_tokeniser.hxx +++ b/src/libserver/css/css_tokeniser.hxx @@ -90,6 +90,7 @@ struct css_parser_token { css_parser_token() = delete; explicit css_parser_token(token_type type, const value_type &value) : value(value), type(type) {} + css_parser_token(css_parser_token &&other) = default; auto adjust_dim(const css_parser_token &dim_token) -> bool; /* Debugging routines */ diff --git a/src/libserver/css/parse_error.hxx b/src/libserver/css/parse_error.hxx index 0a2cbc750..458469afc 100644 --- a/src/libserver/css/parse_error.hxx +++ b/src/libserver/css/parse_error.hxx @@ -30,6 +30,7 @@ namespace rspamd::css { enum class css_parse_error_type { PARSE_ERROR_UNKNOWN_OPTION, PARSE_ERROR_INVALID_SYNTAX, + PARSE_ERROR_BAD_NESTING, PARSE_ERROR_NYI, PARSE_ERROR_UNKNOWN_ERROR, }; |