From 66f078e7f124c357eae417c01837e25761701128 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 17 Feb 2021 15:56:58 +0000 Subject: [PATCH] [Project] Css: Start semantic parsing for rules --- src/libserver/css/css_parser.cxx | 47 +++++++++++++++++++++++++++++- src/libserver/css/css_selector.hxx | 7 +++-- test/lua/unit/css.lua | 6 ++-- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx index 084d1b50a..a0a9d8847 100644 --- a/src/libserver/css/css_parser.cxx +++ b/src/libserver/css/css_parser.cxx @@ -22,6 +22,7 @@ namespace rspamd::css { +struct css_consumed_block; /* * Represents a consumed token by a parser */ @@ -33,7 +34,8 @@ struct css_consumed_block { css_simple_block, css_function, css_function_arg, - css_component + css_component, + css_selector, }; using consumed_block_ptr = std::unique_ptr; @@ -81,6 +83,17 @@ struct css_consumed_block { content = std::move(tok); } + /* Empty blocks used to avoid type checks in loops */ + const inline static std::vector empty_block_vec{}; + + auto get_blocks_or_empty() const -> const std::vector& { + if (content.index() == 1) { + return std::get>(content); + } + + return empty_block_vec; + } + auto token_type_str(void) const -> const char * { const auto *ret = ""; @@ -107,6 +120,9 @@ struct css_consumed_block { case parser_tag_type::css_component: ret = "component"; break; + case parser_tag_type::css_selector: + ret = "selector"; + break; } return ret; @@ -570,6 +586,35 @@ bool css_parser::consume_input(const std::string_view &sv) } + const auto &rules = consumed_blocks->get_blocks_or_empty(); + + for (auto &&rule : rules) { + /* + * For now, we do not need any of the at rules, so we can safely ignore them + */ + auto &&children = rule->get_blocks_or_empty(); + + if (children.size() > 1 && + children[0]->tag == css_consumed_block::parser_tag_type::css_component) { + auto simple_block = std::find_if(children.begin(), children.end(), + [](auto &bl) { + return bl->tag == css_consumed_block::parser_tag_type::css_simple_block; + }); + + if (simple_block != children.end()) { + /* + * We have a component and a simple block, + * so we can parse a declaration + */ + + /* First, tag all components as preamble */ + for (auto it = children.begin(); it != simple_block; ++it) { + (*it)->tag = css_consumed_block::parser_tag_type::css_selector; + } + } + } + } + auto debug_str = consumed_blocks->debug_str(); msg_debug_css("consumed css: {%*s}", (int)debug_str.size(), debug_str.data()); diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx index c9f3046d5..8611630fd 100644 --- a/src/libserver/css/css_selector.hxx +++ b/src/libserver/css/css_selector.hxx @@ -24,6 +24,7 @@ #include #include "contrib/expected/expected.hpp" #include "parse_error.hxx" +#include "css_tokeniser.hxx" #include "html_tags.h" namespace rspamd::css { @@ -41,14 +42,14 @@ struct css_selector { selector_type type; std::variant value; - constexpr std::optional to_tag (void) const { + auto to_tag(void) const -> std::optional { if (type == selector_type::SELECTOR_ELEMENT) { return std::get(value); } return std::nullopt; } - constexpr std::optional to_string (void) const { + auto to_string(void) const -> std::optional { if (type == selector_type::SELECTOR_ELEMENT) { return std::string_view(std::get(value)); } @@ -59,6 +60,8 @@ struct css_selector { size_t inlen); }; + + } #endif //RSPAMD_CSS_SELECTOR_HXX diff --git a/test/lua/unit/css.lua b/test/lua/unit/css.lua index e040a6fd3..a5a8f533f 100644 --- a/test/lua/unit/css.lua +++ b/test/lua/unit/css.lua @@ -50,7 +50,7 @@ p { p:first-child { color: blue; } -a[target=_blank] { +a[target=_blank] #id{ background-color: yellow; } * { @@ -75,7 +75,7 @@ body { } /* Style the topnav links */ -.topnav a { +.topnav a{ float: left; display: block; color: #f2f2f2; @@ -85,7 +85,7 @@ body { } /* Clear floats after the columns */ -.row:after { +.row:after{ content: ""; display: table; clear: both; -- 2.39.5