]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Css: Implement simple css selectors lookup
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 15 Jun 2021 13:55:02 +0000 (14:55 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 15 Jun 2021 13:55:02 +0000 (14:55 +0100)
src/libserver/css/css.cxx
src/libserver/css/css.hxx
src/libserver/css/css_rule.hxx
src/libserver/css/css_selector.hxx

index 12f7753c7f6df3a6e4c4837f32ceb95c7098c011..c6814834187cd9eeaf997d79790a784bc781e316 100644 (file)
@@ -17,6 +17,9 @@
 #include "css.hxx"
 #include "contrib/robin-hood/robin_hood.h"
 #include "css_parser.hxx"
+#include "libserver/html/html_tag.hxx"
+#include "libserver/html/html_block.hxx"
+
 /* Keep unit tests implementation here (it'll possibly be moved outside one day) */
 #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
 #define DOCTEST_CONFIG_IMPLEMENT
@@ -28,8 +31,11 @@ INIT_LOG_MODULE_PUBLIC(css);
 
 class css_style_sheet::impl {
 public:
+       using sel_shared_hash = smart_ptr_hash<css_selector>;
+       using sel_shared_eq = smart_ptr_equal<css_selector>;
        using selector_ptr = std::unique_ptr<css_selector>;
-       using selectors_hash = robin_hood::unordered_flat_map<selector_ptr, css_declarations_block_ptr>;
+       using selectors_hash = robin_hood::unordered_flat_map<selector_ptr, css_declarations_block_ptr,
+                       sel_shared_hash, sel_shared_eq>;
        using universal_selector_t = std::pair<selector_ptr, css_declarations_block_ptr>;
        selectors_hash tags_selector;
        selectors_hash class_selectors;
@@ -96,6 +102,115 @@ css_style_sheet::add_selector_rule(std::unique_ptr<css_selector> &&selector,
        }
 }
 
+auto
+css_style_sheet::check_tag_block(const rspamd::html::html_tag *tag) ->
+               rspamd::html::html_block *
+{
+       std::optional<std::string_view> id_comp, class_comp;
+       rspamd::html::html_block *res = nullptr;
+
+       if (!tag) {
+               return nullptr;
+       }
+
+       /* First, find id in a tag and a class */
+       for (const auto &param : tag->parameters) {
+               if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_ID) {
+                       id_comp = param.value;
+               }
+               else if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
+                       class_comp = param.value;
+               }
+       }
+
+       /* ID part */
+       if (id_comp && !pimpl->id_selectors.empty()) {
+               auto found_id_sel = pimpl->id_selectors.find(css_selector{id_comp.value()});
+
+               if (found_id_sel != pimpl->id_selectors.end()) {
+                       const auto &decl = *(found_id_sel->second);
+                       res = decl.compile_to_block(pool);
+               }
+       }
+
+       /* Class part */
+       if (class_comp && !pimpl->class_selectors.empty()) {
+               auto sv_split = [](auto strv, std::string_view delims = " ") -> std::vector<std::string_view> {
+                       std::vector<decltype(strv)> ret;
+                       std::size_t start = 0;
+
+                       while (start < strv.size()) {
+                               const auto last = strv.find_first_of(delims, start);
+                               if (start != last) {
+                                       ret.emplace_back(strv.substr(start, last - start));
+                               }
+
+                               if (last == std::string_view::npos) {
+                                       if (start < strv.size()) {
+                                               ret.emplace_back(strv.substr(start));
+                                       }
+                                       break;
+                               }
+
+                               start = last + 1;
+                       }
+
+                       return ret;
+               };
+
+               auto elts = sv_split(class_comp.value());
+
+               for (const auto &e : elts) {
+                       auto found_class_sel = pimpl->class_selectors.find(
+                                       css_selector{e, css_selector::selector_type::SELECTOR_CLASS});
+
+                       if (found_class_sel != pimpl->id_selectors.end()) {
+                               const auto &decl = *(found_class_sel->second);
+                               auto *tmp = decl.compile_to_block(pool);
+
+                               if (res == nullptr) {
+                                       res = tmp;
+                               }
+                               else {
+                                       res->propagate_block(*tmp);
+                               }
+                       }
+               }
+       }
+
+       /* Tags part */
+       if (!pimpl->tags_selector.empty()) {
+               auto found_tag_sel = pimpl->class_selectors.find(
+                               css_selector{static_cast<tag_id_t>(tag->id)});
+
+               if (found_tag_sel != pimpl->id_selectors.end()) {
+                       const auto &decl = *(found_tag_sel->second);
+                       auto *tmp = decl.compile_to_block(pool);
+
+                       if (res == nullptr) {
+                               res = tmp;
+                       }
+                       else {
+                               res->propagate_block(*tmp);
+                       }
+               }
+       }
+
+       /* Finally, universal selector */
+       if (pimpl->universal_selector) {
+               auto *tmp = pimpl->universal_selector->second->compile_to_block(pool);
+
+               if (res == nullptr) {
+                       res = tmp;
+               }
+               else {
+                       res->propagate_block(*tmp);
+               }
+       }
+
+       return res;
+}
+
 auto
 css_parse_style(rspamd_mempool_t *pool,
                                         std::string_view input,
index a169a10529d70b616455afb86a7cfb939ff8a4c7..21114cc86019249ad6cc5c4cf3bead8b68591f74 100644 (file)
 #include "css_rule.hxx"
 #include "css_selector.hxx"
 
+namespace rspamd::html {
+/* Forward declaration */
+struct html_tag;
+struct html_block;
+}
+
 namespace rspamd::css {
 
 extern unsigned int rspamd_css_log_id;
@@ -43,6 +49,9 @@ public:
        ~css_style_sheet(); /* must be declared separately due to pimpl */
        auto add_selector_rule(std::unique_ptr<css_selector> &&selector,
                                                   css_declarations_block_ptr decls) -> void;
+
+       auto check_tag_block(const rspamd::html::html_tag *tag) ->
+               rspamd::html::html_block *;
 private:
        class impl;
        rspamd_mempool_t *pool;
index b29bf298fb83ce3cde8c16bce3a7c8c648193a64..8de6c789125f354e8d82343c4f916fd7b8767e76 100644 (file)
@@ -87,8 +87,8 @@ namespace rspamd::css {
 class css_declarations_block {
 public:
        using rule_shared_ptr = std::shared_ptr<css_rule>;
-       using rule_shared_hash = shared_ptr_hash<css_rule>;
-       using rule_shared_eq = shared_ptr_equal<css_rule>;
+       using rule_shared_hash = smart_ptr_hash<css_rule>;
+       using rule_shared_eq = smart_ptr_equal<css_rule>;
        enum class merge_type {
                merge_duplicate,
                merge_parent,
index 633b12c707efd0ea18e63748e6424a35081dd1fb..1e814573269661476732df82ca779e2083123983 100644 (file)
@@ -29,6 +29,7 @@
 #include "parse_error.hxx"
 #include "css_parser.hxx"
 #include "libserver/html/html_tags.h"
+#include "libcryptobox/cryptobox.h"
 
 namespace rspamd::css {
 
@@ -67,13 +68,23 @@ struct css_selector {
        }
 
        auto to_string(void) const -> std::optional<const std::string_view> {
-               if (type == selector_type::SELECTOR_ELEMENT) {
+               if (type != selector_type::SELECTOR_ELEMENT) {
                        return std::string_view(std::get<std::string_view>(value));
                }
                return std::nullopt;
        };
 
        explicit css_selector(selector_type t) : type(t) {}
+       explicit css_selector(tag_id_t t) : type(selector_type::SELECTOR_ELEMENT) {
+               value = t;
+       }
+       explicit css_selector(const std::string_view &st, selector_type t = selector_type::SELECTOR_ID) : type(t) {
+               value = st;
+       }
+
+       auto operator ==(const css_selector &other) const -> bool {
+               return type == other.type && value == other.value;
+       }
 
        auto debug_str(void) const -> std::string;
 };
@@ -90,4 +101,22 @@ auto process_selector_tokens(rspamd_mempool_t *pool,
 
 }
 
+/* Selectors hashing */
+namespace std {
+template<>
+class hash<rspamd::css::css_selector> {
+public:
+       auto operator() (const rspamd::css::css_selector &sel) const -> auto {
+               if (sel.type == rspamd::css::css_selector::selector_type::SELECTOR_ELEMENT) {
+                       return static_cast<std::uint64_t>(std::get<tag_id_t>(sel.value));
+               }
+               else {
+                       const auto &sv = std::get<std::string_view>(sel.value);
+
+                       return rspamd_cryptobox_fast_hash(sv.data(), sv.size(), 0xdeadbabe);
+               }
+       }
+};
+}
+
 #endif //RSPAMD_CSS_SELECTOR_HXX