diff options
Diffstat (limited to 'src/libserver/css')
-rw-r--r-- | src/libserver/css/css_parser.cxx | 254 | ||||
-rw-r--r-- | src/libserver/css/css_parser.hxx | 118 | ||||
-rw-r--r-- | src/libserver/css/css_property.cxx | 36 | ||||
-rw-r--r-- | src/libserver/css/css_property.hxx | 8 | ||||
-rw-r--r-- | src/libserver/css/css_rule.cxx | 93 | ||||
-rw-r--r-- | src/libserver/css/css_rule.hxx | 6 | ||||
-rw-r--r-- | src/libserver/css/css_selector.cxx | 2 | ||||
-rw-r--r-- | src/libserver/css/css_selector.hxx | 5 | ||||
-rw-r--r-- | src/libserver/css/css_tokeniser.cxx | 3 | ||||
-rw-r--r-- | src/libserver/css/css_tokeniser.hxx | 9 | ||||
-rw-r--r-- | src/libserver/css/css_value.cxx | 4 | ||||
-rw-r--r-- | src/libserver/css/css_value.hxx | 4 |
12 files changed, 355 insertions, 187 deletions
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx index 1a9231700..e4a8159f1 100644 --- a/src/libserver/css/css_parser.cxx +++ b/src/libserver/css/css_parser.cxx @@ -24,181 +24,97 @@ namespace rspamd::css { -struct css_consumed_block; -/* - * Represents a consumed token by a parser - */ -struct css_consumed_block { - enum class parser_tag_type : std::uint8_t { - css_top_block, - css_qualified_rule, - css_at_rule, - css_simple_block, - css_function, - css_function_arg, - css_component, - css_selector, - }; - - using consumed_block_ptr = std::unique_ptr<css_consumed_block>; - - parser_tag_type tag; - std::variant<std::monostate, - std::vector<consumed_block_ptr>, - css_parser_token> content; - - css_consumed_block() = delete; - - css_consumed_block(parser_tag_type tag) : tag(tag) { - if (tag == parser_tag_type::css_top_block || - tag == parser_tag_type::css_qualified_rule || - tag == parser_tag_type::css_simple_block) { - /* Pre-allocate content for known vector blocks */ - std::vector<consumed_block_ptr> vec; - vec.reserve(4); - content = std::move(vec); - } - } - /* Construct a block from a single lexer token (for trivial blocks) */ - explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) : - tag(tag), content(std::move(tok)) {} - - /* Attach a new block to the compound block, consuming block inside */ - auto attach_block(consumed_block_ptr &&block) -> bool { - if (content.index() == 0) { - /* Switch from monostate */ - content = std::vector<consumed_block_ptr>(); - } - else if (content.index() == 2) { - /* A single component, cannot attach a block ! */ - return false; - } - - auto &value_vec = std::get<std::vector<consumed_block_ptr>>(content); - value_vec.push_back(std::move(block)); +const css_consumed_block css_parser_eof_block{}; - return true; +auto css_consumed_block::attach_block(consumed_block_ptr &&block) -> bool { + if (content.index() == 0) { + /* Switch from monostate */ + content = std::vector<consumed_block_ptr>(); } - - auto assign_token(css_parser_token &&tok) -> void - { - content = std::move(tok); - } - - /* Empty blocks used to avoid type checks in loops */ - const inline static std::vector<consumed_block_ptr> empty_block_vec{}; - - auto get_blocks_or_empty() const -> const std::vector<consumed_block_ptr>& { - if (content.index() == 1) { - return std::get<std::vector<consumed_block_ptr>>(content); - } - - return empty_block_vec; - } - - auto get_token_or_empty() const -> const css_parser_token& { - if (content.index() == 2) { - return std::get<css_parser_token>(content); - } - - return css_parser_eof_token(); + else if (content.index() == 2) { + /* A single component, cannot attach a block ! */ + return false; } - auto token_type_str(void) const -> const char * - { - const auto *ret = ""; + auto &value_vec = std::get<std::vector<consumed_block_ptr>>(content); + value_vec.push_back(std::move(block)); - switch(tag) { - case parser_tag_type::css_top_block: - ret = "top"; - break; - case parser_tag_type::css_qualified_rule: - ret = "qualified rule"; - break; - case parser_tag_type::css_at_rule: - ret = "at rule"; - break; - case parser_tag_type::css_simple_block: - ret = "simple block"; - break; - case parser_tag_type::css_function: - ret = "function"; - break; - case parser_tag_type::css_function_arg: - ret = "function args"; - break; - case parser_tag_type::css_component: - ret = "component"; - break; - case parser_tag_type::css_selector: - ret = "selector"; - break; - } + return true; +} - return ret; +auto css_consumed_block::token_type_str(void) const -> const char * +{ + const auto *ret = ""; + + switch(tag) { + case parser_tag_type::css_top_block: + ret = "top"; + break; + case parser_tag_type::css_qualified_rule: + ret = "qualified rule"; + break; + case parser_tag_type::css_at_rule: + ret = "at rule"; + break; + case parser_tag_type::css_simple_block: + ret = "simple block"; + break; + case parser_tag_type::css_function: + ret = "function"; + break; + case parser_tag_type::css_function_arg: + ret = "function args"; + break; + case parser_tag_type::css_component: + ret = "component"; + break; + case parser_tag_type::css_selector: + ret = "selector"; + break; + case parser_tag_type::css_eof_block: + ret = "eof"; + break; } - auto size() const -> std::size_t { - auto ret = 0; - - std::visit([&](auto& arg) { - using T = std::decay_t<decltype(arg)>; - - if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) { - /* Array of blocks */ - ret = arg.size(); - } - else if constexpr (std::is_same_v<T, std::monostate>) { - /* Empty block */ - ret = 0; - } - else { - /* Single element block */ - ret = 1; - } - }, - content); + return ret; +} - return ret; - } +auto css_consumed_block::debug_str(void) -> std::string { + std::string ret = std::string(R"("type": ")") + token_type_str() + "\""; - auto debug_str(void) -> std::string { - std::string ret = std::string("\"type\": \"") + token_type_str() + "\""; + ret += ", \"value\": "; - ret += ", \"value\": "; + std::visit([&](auto& arg) { + using T = std::decay_t<decltype(arg)>; - std::visit([&](auto& arg) { - using T = std::decay_t<decltype(arg)>; + if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) { + /* Array of blocks */ + ret += "["; + for (const auto &block : arg) { + ret += "{"; + ret += block->debug_str(); + ret += "}, "; + } - if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) { - /* Array of blocks */ - ret += "["; - for (const auto &block : arg) { - ret += "{"; - ret += block->debug_str(); - ret += "}, "; + if (*(--ret.end()) == ' ') { + ret.pop_back(); + ret.pop_back(); /* Last ',' */ + } + ret += "]"; } - - if (*(--ret.end()) == ' ') { - ret.pop_back(); - ret.pop_back(); /* Last ',' */ + else if constexpr (std::is_same_v<T, std::monostate>) { + /* Empty block */ + ret += R"("empty")"; } - ret += "]"; - } - else if constexpr (std::is_same_v<T, std::monostate>) { - /* Empty block */ - ret += "\"empty\""; - } - else { - /* Single element block */ - ret += "\"" + arg.debug_token_str() + "\""; - } - }, - content); + else { + /* Single element block */ + ret += "\"" + arg.debug_token_str() + "\""; + } + }, + content); - return ret; - } -}; + return ret; +} class css_parser { public: @@ -622,18 +538,18 @@ bool css_parser::consume_input(const std::string_view &sv) auto selector_it = children.cbegin(); auto selector_token_functor = [&selector_it,&simple_block](void) - -> const css_parser_token & { + -> const css_consumed_block & { for (;;) { if (selector_it == simple_block) { - return css_parser_eof_token(); + return css_parser_eof_block; } - const auto &ret = (*selector_it)->get_token_or_empty(); + const auto &ret = (*selector_it); ++selector_it; - if (ret.type != css_parser_token::token_type::eof_token) { - return ret; + if (ret->get_token_or_empty().type != css_parser_token::token_type::eof_token) { + return *ret; } } }; @@ -643,18 +559,18 @@ bool css_parser::consume_input(const std::string_view &sv) auto decls_it = (*simple_block)->get_blocks_or_empty().cbegin(); auto decls_end = (*simple_block)->get_blocks_or_empty().cend(); auto declaration_token_functor = [&decls_it,&decls_end](void) - -> const css_parser_token & { + -> const css_consumed_block & { for (;;) { if (decls_it == decls_end) { - return css_parser_eof_token(); + return css_parser_eof_block; } - const auto &ret = (*decls_it)->get_token_or_empty(); + const auto &ret = (*decls_it); ++decls_it; - if (ret.type != css_parser_token::token_type::eof_token) { - return ret; + if (ret->get_token_or_empty().type != css_parser_token::token_type::eof_token) { + return *ret; } } }; diff --git a/src/libserver/css/css_parser.hxx b/src/libserver/css/css_parser.hxx index 2f10f994e..de982525a 100644 --- a/src/libserver/css/css_parser.hxx +++ b/src/libserver/css/css_parser.hxx @@ -19,13 +19,131 @@ #ifndef RSPAMD_CSS_PARSER_HXX #define RSPAMD_CSS_PARSER_HXX +#include <variant> +#include <vector> +#include <memory> +#include <string> + +#include "css_tokeniser.hxx" #include "css.hxx" #include "parse_error.hxx" #include "contrib/expected/expected.hpp" #include "logger.h" + namespace rspamd::css { +/* + * Represents a consumed token by a parser + */ +class css_consumed_block { +public: + enum class parser_tag_type : std::uint8_t { + css_top_block, + css_qualified_rule, + css_at_rule, + css_simple_block, + css_function, + css_function_arg, + css_component, + css_selector, + css_eof_block, + }; + using consumed_block_ptr = std::unique_ptr<css_consumed_block>; + + css_consumed_block() : tag(parser_tag_type::css_eof_block) {} + css_consumed_block(parser_tag_type tag) : tag(tag) { + if (tag == parser_tag_type::css_top_block || + tag == parser_tag_type::css_qualified_rule || + tag == parser_tag_type::css_simple_block) { + /* Pre-allocate content for known vector blocks */ + std::vector<consumed_block_ptr> vec; + vec.reserve(4); + content = std::move(vec); + } + } + /* Construct a block from a single lexer token (for trivial blocks) */ + explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) : + tag(tag), content(std::move(tok)) {} + + /* Attach a new block to the compound block, consuming block inside */ + auto attach_block(consumed_block_ptr &&block) -> bool; + + auto assign_token(css_parser_token &&tok) -> void { + content = std::move(tok); + } + + /* Empty blocks used to avoid type checks in loops */ + const inline static std::vector<consumed_block_ptr> empty_block_vec{}; + + auto is_blocks_vec() const -> bool { + return (content.index() == 1); + } + + auto get_blocks_or_empty() const -> const std::vector<consumed_block_ptr>& { + if (is_blocks_vec()) { + return std::get<std::vector<consumed_block_ptr>>(content); + } + + return empty_block_vec; + } + + auto is_token() const -> bool { + return (content.index() == 2); + } + + auto get_token_or_empty() const -> const css_parser_token& { + if (is_token()) { + return std::get<css_parser_token>(content); + } + + return css_parser_eof_token(); + } + + auto size() const -> std::size_t { + auto ret = 0; + + std::visit([&](auto& arg) { + using T = std::decay_t<decltype(arg)>; + + if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) { + /* Array of blocks */ + ret = arg.size(); + } + else if constexpr (std::is_same_v<T, std::monostate>) { + /* Empty block */ + ret = 0; + } + else { + /* Single element block */ + ret = 1; + } + }, + content); + + return ret; + } + + auto is_eof() -> bool { + return tag == parser_tag_type::css_eof_block; + } + + /* Debug methods */ + auto token_type_str(void) const -> const char *; + auto debug_str(void) -> std::string; + +public: + parser_tag_type tag; +private: + std::variant<std::monostate, + std::vector<consumed_block_ptr>, + css_parser_token> content; +}; + +extern const css_consumed_block css_parser_eof_block; + +using blocks_gen_functor = std::function<const css_consumed_block &(void)>; + auto parse_css (rspamd_mempool_t *pool, const std::string_view &st) -> tl::expected<std::unique_ptr<css_style_sheet>,css_parse_error>; diff --git a/src/libserver/css/css_property.cxx b/src/libserver/css/css_property.cxx index 98543f75a..77927d724 100644 --- a/src/libserver/css/css_property.cxx +++ b/src/libserver/css/css_property.cxx @@ -15,12 +15,44 @@ */ #include "css_property.hxx" - +#include "frozen/unordered_map.h" +#include "frozen/string.h" namespace rspamd::css { -auto css_property::from_bytes (const char *input, size_t inlen) -> tl::expected<css_property,css_parse_error> +constexpr const auto max_type = static_cast<int>(css_property_type::PROPERTY_NYI); +constexpr frozen::unordered_map<frozen::string, css_property_type, max_type> type_map{ + {"font", css_property_type::PROPERTY_FONT}, + {"color", css_property_type::PROPERTY_COLOR}, + {"bgcolor", css_property_type::PROPERTY_BGCOLOR}, + {"background", css_property_type::PROPERTY_BACKGROUND}, + {"height", css_property_type::PROPERTY_HEIGHT}, + {"width", css_property_type::PROPERTY_WIDTH}, + {"display", css_property_type::PROPERTY_DISPLAY}, + {"visibility", css_property_type::PROPERTY_VISIBILITY}, +}; + +auto token_string_to_property(const std::string_view &inp) -> css_property_type { + + css_property_type ret = css_property_type::PROPERTY_NYI; + + auto known_type = type_map.find(inp); + + if (known_type != type_map.end()) { + ret = known_type->second; + } + + return ret; +} + +auto css_property::from_token(const css_parser_token &tok) -> tl::expected<css_property,css_parse_error> { + if (tok.type == css_parser_token::token_type::ident_token) { + auto sv = tok.get_string_or_default(""); + + return css_property{token_string_to_property(sv)}; + } + return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)}; } diff --git a/src/libserver/css/css_property.hxx b/src/libserver/css/css_property.hxx index 2e668c640..562e54894 100644 --- a/src/libserver/css/css_property.hxx +++ b/src/libserver/css/css_property.hxx @@ -19,6 +19,7 @@ #define RSPAMD_CSS_PROPERTY_HXX #include <string> +#include "css_tokeniser.hxx" #include "parse_error.hxx" #include "contrib/expected/expected.hpp" @@ -29,7 +30,7 @@ namespace rspamd::css { * point of view */ enum class css_property_type { - PROPERTY_FONT, + PROPERTY_FONT = 0, PROPERTY_COLOR, PROPERTY_BGCOLOR, PROPERTY_BACKGROUND, @@ -37,12 +38,13 @@ enum class css_property_type { PROPERTY_WIDTH, PROPERTY_DISPLAY, PROPERTY_VISIBILITY, + PROPERTY_NYI, }; struct css_property { css_property_type type; - static tl::expected<css_property,css_parse_error> from_bytes (const char *input, - size_t inlen); + static tl::expected<css_property,css_parse_error> from_token( + const css_parser_token &tok); }; diff --git a/src/libserver/css/css_rule.cxx b/src/libserver/css/css_rule.cxx index 44148b01a..cb0d4abad 100644 --- a/src/libserver/css/css_rule.cxx +++ b/src/libserver/css/css_rule.cxx @@ -19,10 +19,101 @@ namespace rspamd::css { auto process_declaration_tokens(rspamd_mempool_t *pool, - const tokeniser_gen_functor &next_token_functor) + const blocks_gen_functor &next_block_functor) -> declarations_vec { declarations_vec ret; + bool can_continue = true; + css_property cur_property{css_property_type::PROPERTY_NYI}; + static const css_property bad_property{css_property_type::PROPERTY_NYI}; + std::unique_ptr<css_rule> cur_rule; + + enum { + parse_property, + parse_value, + ignore_value, /* For unknown properties */ + } state = parse_property; + + while (can_continue) { + const auto &next_tok = next_block_functor(); + + switch (next_tok.tag) { + case css_consumed_block::parser_tag_type::css_component: + if (state == parse_property) { + cur_property = css_property::from_token(next_tok.get_token_or_empty()) + .value_or(bad_property); + + if (cur_property.type == css_property_type::PROPERTY_NYI) { + state = ignore_value; + /* Ignore everything till ; */ + continue; + } + + /* We now expect colon block */ + const auto &expect_colon_block = next_block_functor(); + + if (expect_colon_block.tag != css_consumed_block::parser_tag_type::css_component) { + + state = ignore_value; /* Ignore up to the next rule */ + } + else { + const auto &expect_colon_tok = expect_colon_block.get_token_or_empty(); + + if (expect_colon_tok.type != css_parser_token::token_type::colon_token) { + msg_debug_css("invalid rule, no colon after property"); + state = ignore_value; /* Ignore up to the next rule */ + } + else { + state = parse_value; + cur_rule = std::make_unique<css_rule>(cur_property); + } + } + } + else if (state == parse_value) { + /* Check semicolon */ + if (next_tok.is_token()) { + const auto &parser_tok = next_tok.get_token_or_empty(); + + if (parser_tok.type == css_parser_token::token_type::semicolon_token) { + ret.push_back(std::move(cur_rule)); + state = parse_property; + continue; + } + } + + auto maybe_value = css_value::from_css_block(next_tok); + + if (maybe_value) { + cur_rule->add_value(maybe_value.value()); + } + } + else { + /* Ignore all till ; */ + if (next_tok.is_token()) { + const auto &parser_tok = next_tok.get_token_or_empty(); + + if (parser_tok.type == css_parser_token::token_type::semicolon_token) { + state = parse_property; + } + } + } + break; + case css_consumed_block::parser_tag_type::css_function: + case css_consumed_block::parser_tag_type::css_function_arg: + if (state == parse_value) { + auto maybe_value = css_value::from_css_block(next_tok); + + if (maybe_value) { + cur_rule->add_value(maybe_value.value()); + } + } + break; + case css_consumed_block::parser_tag_type::css_eof_block: + default: + can_continue = false; + break; + } + } return ret; /* copy elision */ } diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx index 725b6448b..929c5b263 100644 --- a/src/libserver/css/css_rule.hxx +++ b/src/libserver/css/css_rule.hxx @@ -20,7 +20,7 @@ #include "css_value.hxx" #include "css_property.hxx" -#include "css_tokeniser.hxx" +#include "css_parser.hxx" #include <vector> #include <memory> @@ -38,7 +38,7 @@ public: css_rule(css_rule &&other) = default; explicit css_rule(css_property &&prop, css_values_vec &&values) : prop(prop), values(std::forward<css_values_vec>(values)) {} - explicit css_rule(css_property &&prop) : prop(prop), values{} {} + explicit css_rule(const css_property &prop) : prop(prop), values{} {} /* Methods */ void add_value(std::unique_ptr<css_value> &&value) { values.emplace_back(std::forward<std::unique_ptr<css_value>>(value)); @@ -53,7 +53,7 @@ public: using declarations_vec = std::vector<std::unique_ptr<css_rule>>; auto process_declaration_tokens(rspamd_mempool_t *pool, - const tokeniser_gen_functor &next_token_functor) + const blocks_gen_functor &next_token_functor) -> declarations_vec; } diff --git a/src/libserver/css/css_selector.cxx b/src/libserver/css/css_selector.cxx index 1d6f727ea..2f1f29aca 100644 --- a/src/libserver/css/css_selector.cxx +++ b/src/libserver/css/css_selector.cxx @@ -19,7 +19,7 @@ namespace rspamd::css { auto process_selector_tokens(rspamd_mempool_t *pool, - const tokeniser_gen_functor &next_token_functor) + const blocks_gen_functor &next_token_functor) -> selectors_vec { selectors_vec ret; diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx index 59f7bbbe7..a701e20f6 100644 --- a/src/libserver/css/css_selector.hxx +++ b/src/libserver/css/css_selector.hxx @@ -25,8 +25,9 @@ #include <vector> #include <functional> #include <memory> + #include "parse_error.hxx" -#include "css_tokeniser.hxx" +#include "css_parser.hxx" #include "html_tags.h" namespace rspamd::css { @@ -65,7 +66,7 @@ using selectors_vec = std::vector<std::unique_ptr<css_selector>>; * Consume selectors token and split them to the list of selectors */ auto process_selector_tokens(rspamd_mempool_t *pool, - const tokeniser_gen_functor &next_token_functor) + const blocks_gen_functor &next_token_functor) -> selectors_vec; } diff --git a/src/libserver/css/css_tokeniser.cxx b/src/libserver/css/css_tokeniser.cxx index c875666da..be2b4f802 100644 --- a/src/libserver/css/css_tokeniser.cxx +++ b/src/libserver/css/css_tokeniser.cxx @@ -597,6 +597,9 @@ auto css_tokeniser::next_token(void) -> struct css_parser_token case ';': offset = i + 1; return make_token<css_parser_token::token_type::semicolon_token>(); + case ':': + offset = i + 1; + return make_token<css_parser_token::token_type::colon_token>(); case '<': /* Maybe an xml like comment */ if (i + 3 < input.size () && input[i + 1] == '!' diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx index e3ba47437..b39e8431c 100644 --- a/src/libserver/css/css_tokeniser.hxx +++ b/src/libserver/css/css_tokeniser.hxx @@ -95,6 +95,13 @@ struct css_parser_token { css_parser_token(css_parser_token &&other) = default; auto operator=(css_parser_token &&other) -> css_parser_token& = default; auto adjust_dim(const css_parser_token &dim_token) -> bool; + auto get_string_or_default(const std::string_view &def) const -> std::string_view { + if (value.index() == 0) { + return std::get<std::string_view>(value); + } + + return def; + } /* Debugging routines */ constexpr auto get_token_type() -> const char *; @@ -139,8 +146,6 @@ private: auto consume_ident() -> struct css_parser_token; }; -using tokeniser_gen_functor = std::function<const css_parser_token &(void)>; - } diff --git a/src/libserver/css/css_value.cxx b/src/libserver/css/css_value.cxx index af4691daf..eb893f21c 100644 --- a/src/libserver/css/css_value.cxx +++ b/src/libserver/css/css_value.cxx @@ -18,8 +18,8 @@ namespace rspamd::css { -tl::expected<css_value,css_parse_error> css_value::from_bytes (const char *input, - size_t inlen) +tl::expected<css_value,css_parse_error> +css_value::from_css_block(const css_consumed_block &bl) { return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)}; } diff --git a/src/libserver/css/css_value.hxx b/src/libserver/css/css_value.hxx index 302eb945b..9290dc8f4 100644 --- a/src/libserver/css/css_value.hxx +++ b/src/libserver/css/css_value.hxx @@ -24,6 +24,7 @@ #include <variant> #include <optional> #include "parse_error.hxx" +#include "css_parser.hxx" #include "contrib/expected/expected.hpp" namespace rspamd::css { @@ -109,8 +110,7 @@ struct css_value { return (type != css_value_type::CSS_VALUE_NYI); } - static tl::expected<css_value,css_parse_error> from_bytes (const char *input, - size_t inlen); + static tl::expected<css_value,css_parse_error> from_css_block(const css_consumed_block &bl); }; } |