From: Vsevolod Stakhov Date: Tue, 15 Jun 2021 13:55:02 +0000 (+0100) Subject: [Project] Css: Implement simple css selectors lookup X-Git-Tag: 3.0~299 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=ef252b1d2cb9fe410392f2dae52f4202392ff12d;p=rspamd.git [Project] Css: Implement simple css selectors lookup --- diff --git a/src/libserver/css/css.cxx b/src/libserver/css/css.cxx index 12f7753c7..c68148341 100644 --- a/src/libserver/css/css.cxx +++ b/src/libserver/css/css.cxx @@ -17,6 +17,9 @@ #include "css.hxx" #include "contrib/robin-hood/robin_hood.h" #include "css_parser.hxx" +#include "libserver/html/html_tag.hxx" +#include "libserver/html/html_block.hxx" + /* Keep unit tests implementation here (it'll possibly be moved outside one day) */ #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL #define DOCTEST_CONFIG_IMPLEMENT @@ -28,8 +31,11 @@ INIT_LOG_MODULE_PUBLIC(css); class css_style_sheet::impl { public: + using sel_shared_hash = smart_ptr_hash; + using sel_shared_eq = smart_ptr_equal; using selector_ptr = std::unique_ptr; - using selectors_hash = robin_hood::unordered_flat_map; + using selectors_hash = robin_hood::unordered_flat_map; using universal_selector_t = std::pair; selectors_hash tags_selector; selectors_hash class_selectors; @@ -96,6 +102,115 @@ css_style_sheet::add_selector_rule(std::unique_ptr &&selector, } } +auto +css_style_sheet::check_tag_block(const rspamd::html::html_tag *tag) -> + rspamd::html::html_block * +{ + std::optional id_comp, class_comp; + rspamd::html::html_block *res = nullptr; + + if (!tag) { + return nullptr; + } + + /* First, find id in a tag and a class */ + for (const auto ¶m : tag->parameters) { + if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_ID) { + id_comp = param.value; + } + else if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_CLASS) { + class_comp = param.value; + } + } + + /* ID part */ + if (id_comp && !pimpl->id_selectors.empty()) { + auto found_id_sel = pimpl->id_selectors.find(css_selector{id_comp.value()}); + + if (found_id_sel != pimpl->id_selectors.end()) { + const auto &decl = *(found_id_sel->second); + res = decl.compile_to_block(pool); + } + } + + /* Class part */ + if (class_comp && !pimpl->class_selectors.empty()) { + auto sv_split = [](auto strv, std::string_view delims = " ") -> std::vector { + std::vector ret; + std::size_t start = 0; + + while (start < strv.size()) { + const auto last = strv.find_first_of(delims, start); + if (start != last) { + ret.emplace_back(strv.substr(start, last - start)); + } + + if (last == std::string_view::npos) { + if (start < strv.size()) { + ret.emplace_back(strv.substr(start)); + } + break; + } + + start = last + 1; + } + + return ret; + }; + + auto elts = sv_split(class_comp.value()); + + for (const auto &e : elts) { + auto found_class_sel = pimpl->class_selectors.find( + css_selector{e, css_selector::selector_type::SELECTOR_CLASS}); + + if (found_class_sel != pimpl->id_selectors.end()) { + const auto &decl = *(found_class_sel->second); + auto *tmp = decl.compile_to_block(pool); + + if (res == nullptr) { + res = tmp; + } + else { + res->propagate_block(*tmp); + } + } + } + } + + /* Tags part */ + if (!pimpl->tags_selector.empty()) { + auto found_tag_sel = pimpl->class_selectors.find( + css_selector{static_cast(tag->id)}); + + if (found_tag_sel != pimpl->id_selectors.end()) { + const auto &decl = *(found_tag_sel->second); + auto *tmp = decl.compile_to_block(pool); + + if (res == nullptr) { + res = tmp; + } + else { + res->propagate_block(*tmp); + } + } + } + + /* Finally, universal selector */ + if (pimpl->universal_selector) { + auto *tmp = pimpl->universal_selector->second->compile_to_block(pool); + + if (res == nullptr) { + res = tmp; + } + else { + res->propagate_block(*tmp); + } + } + + return res; +} + auto css_parse_style(rspamd_mempool_t *pool, std::string_view input, diff --git a/src/libserver/css/css.hxx b/src/libserver/css/css.hxx index a169a1052..21114cc86 100644 --- a/src/libserver/css/css.hxx +++ b/src/libserver/css/css.hxx @@ -24,6 +24,12 @@ #include "css_rule.hxx" #include "css_selector.hxx" +namespace rspamd::html { +/* Forward declaration */ +struct html_tag; +struct html_block; +} + namespace rspamd::css { extern unsigned int rspamd_css_log_id; @@ -43,6 +49,9 @@ public: ~css_style_sheet(); /* must be declared separately due to pimpl */ auto add_selector_rule(std::unique_ptr &&selector, css_declarations_block_ptr decls) -> void; + + auto check_tag_block(const rspamd::html::html_tag *tag) -> + rspamd::html::html_block *; private: class impl; rspamd_mempool_t *pool; diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx index b29bf298f..8de6c7891 100644 --- a/src/libserver/css/css_rule.hxx +++ b/src/libserver/css/css_rule.hxx @@ -87,8 +87,8 @@ namespace rspamd::css { class css_declarations_block { public: using rule_shared_ptr = std::shared_ptr; - using rule_shared_hash = shared_ptr_hash; - using rule_shared_eq = shared_ptr_equal; + using rule_shared_hash = smart_ptr_hash; + using rule_shared_eq = smart_ptr_equal; enum class merge_type { merge_duplicate, merge_parent, diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx index 633b12c70..1e8145732 100644 --- a/src/libserver/css/css_selector.hxx +++ b/src/libserver/css/css_selector.hxx @@ -29,6 +29,7 @@ #include "parse_error.hxx" #include "css_parser.hxx" #include "libserver/html/html_tags.h" +#include "libcryptobox/cryptobox.h" namespace rspamd::css { @@ -67,13 +68,23 @@ struct css_selector { } auto to_string(void) const -> std::optional { - if (type == selector_type::SELECTOR_ELEMENT) { + if (type != selector_type::SELECTOR_ELEMENT) { return std::string_view(std::get(value)); } return std::nullopt; }; explicit css_selector(selector_type t) : type(t) {} + explicit css_selector(tag_id_t t) : type(selector_type::SELECTOR_ELEMENT) { + value = t; + } + explicit css_selector(const std::string_view &st, selector_type t = selector_type::SELECTOR_ID) : type(t) { + value = st; + } + + auto operator ==(const css_selector &other) const -> bool { + return type == other.type && value == other.value; + } auto debug_str(void) const -> std::string; }; @@ -90,4 +101,22 @@ auto process_selector_tokens(rspamd_mempool_t *pool, } +/* Selectors hashing */ +namespace std { +template<> +class hash { +public: + auto operator() (const rspamd::css::css_selector &sel) const -> auto { + if (sel.type == rspamd::css::css_selector::selector_type::SELECTOR_ELEMENT) { + return static_cast(std::get(sel.value)); + } + else { + const auto &sv = std::get(sel.value); + + return rspamd_cryptobox_fast_hash(sv.data(), sv.size(), 0xdeadbabe); + } + } +}; +} + #endif //RSPAMD_CSS_SELECTOR_HXX