aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver/html/html.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'src/libserver/html/html.cxx')
-rw-r--r--src/libserver/html/html.cxx1017
1 files changed, 831 insertions, 186 deletions
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 93d1fdf91..78a6a975c 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -39,6 +39,7 @@
#include "contrib/frozen/include/frozen/string.h"
#include "contrib/fmt/include/fmt/core.h"
+#include <functional>
#include <unicode/uversion.h>
namespace rspamd::html {
@@ -47,23 +48,88 @@ static const unsigned int max_tags = 8192; /* Ignore tags if this maximum is rea
static const html_tags_storage html_tags_defs;
-auto html_components_map = frozen::make_unordered_map<frozen::string, html_component_type>(
+auto html_components_map = frozen::make_unordered_map<frozen::string, html_component_enum_type>(
{
- {"name", html_component_type::RSPAMD_HTML_COMPONENT_NAME},
- {"href", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
- {"src", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
- {"action", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
- {"color", html_component_type::RSPAMD_HTML_COMPONENT_COLOR},
- {"bgcolor", html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR},
- {"style", html_component_type::RSPAMD_HTML_COMPONENT_STYLE},
- {"class", html_component_type::RSPAMD_HTML_COMPONENT_CLASS},
- {"width", html_component_type::RSPAMD_HTML_COMPONENT_WIDTH},
- {"height", html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT},
- {"size", html_component_type::RSPAMD_HTML_COMPONENT_SIZE},
- {"rel", html_component_type::RSPAMD_HTML_COMPONENT_REL},
- {"alt", html_component_type::RSPAMD_HTML_COMPONENT_ALT},
- {"id", html_component_type::RSPAMD_HTML_COMPONENT_ID},
- {"hidden", html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN},
+ {"name", html_component_enum_type::RSPAMD_HTML_COMPONENT_NAME},
+ {"href", html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF},
+ {"src", html_component_enum_type::RSPAMD_HTML_COMPONENT_SRC},
+ {"action", html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF},
+ {"color", html_component_enum_type::RSPAMD_HTML_COMPONENT_COLOR},
+ {"bgcolor", html_component_enum_type::RSPAMD_HTML_COMPONENT_BGCOLOR},
+ {"style", html_component_enum_type::RSPAMD_HTML_COMPONENT_STYLE},
+ {"class", html_component_enum_type::RSPAMD_HTML_COMPONENT_CLASS},
+ {"width", html_component_enum_type::RSPAMD_HTML_COMPONENT_WIDTH},
+ {"height", html_component_enum_type::RSPAMD_HTML_COMPONENT_HEIGHT},
+ {"size", html_component_enum_type::RSPAMD_HTML_COMPONENT_SIZE},
+ {"rel", html_component_enum_type::RSPAMD_HTML_COMPONENT_REL},
+ {"alt", html_component_enum_type::RSPAMD_HTML_COMPONENT_ALT},
+ {"id", html_component_enum_type::RSPAMD_HTML_COMPONENT_ID},
+ {"hidden", html_component_enum_type::RSPAMD_HTML_COMPONENT_HIDDEN},
+ // Typography
+ {"font-family", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_FAMILY},
+ {"font-size", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_SIZE},
+ {"font-weight", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_WEIGHT},
+ {"font-style", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_STYLE},
+ {"text-align", html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_ALIGN},
+ {"text-decoration", html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_DECORATION},
+ {"line-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_LINE_HEIGHT},
+ // Layout & positioning
+ {"margin", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN},
+ {"margin-top", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_TOP},
+ {"margin-bottom", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM},
+ {"margin-left", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_LEFT},
+ {"margin-right", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_RIGHT},
+ {"padding", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING},
+ {"padding-top", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_TOP},
+ {"padding-bottom", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_BOTTOM},
+ {"padding-left", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_LEFT},
+ {"padding-right", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_RIGHT},
+ {"border", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER},
+ {"border-color", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_COLOR},
+ {"border-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_WIDTH},
+ {"border-style", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_STYLE},
+ // Display & visibility
+ {"display", html_component_enum_type::RSPAMD_HTML_COMPONENT_DISPLAY},
+ {"visibility", html_component_enum_type::RSPAMD_HTML_COMPONENT_VISIBILITY},
+ {"opacity", html_component_enum_type::RSPAMD_HTML_COMPONENT_OPACITY},
+ // Dimensions
+ {"min-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_WIDTH},
+ {"max-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_WIDTH},
+ {"min-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_HEIGHT},
+ {"max-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_HEIGHT},
+ // Table attributes
+ {"cellpadding", html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLPADDING},
+ {"cellspacing", html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLSPACING},
+ {"valign", html_component_enum_type::RSPAMD_HTML_COMPONENT_VALIGN},
+ {"align", html_component_enum_type::RSPAMD_HTML_COMPONENT_ALIGN},
+ // Form attributes
+ {"type", html_component_enum_type::RSPAMD_HTML_COMPONENT_TYPE},
+ {"value", html_component_enum_type::RSPAMD_HTML_COMPONENT_VALUE},
+ {"placeholder", html_component_enum_type::RSPAMD_HTML_COMPONENT_PLACEHOLDER},
+ {"disabled", html_component_enum_type::RSPAMD_HTML_COMPONENT_DISABLED},
+ {"readonly", html_component_enum_type::RSPAMD_HTML_COMPONENT_READONLY},
+ {"checked", html_component_enum_type::RSPAMD_HTML_COMPONENT_CHECKED},
+ {"selected", html_component_enum_type::RSPAMD_HTML_COMPONENT_SELECTED},
+ // Link & media
+ {"target", html_component_enum_type::RSPAMD_HTML_COMPONENT_TARGET},
+ {"title", html_component_enum_type::RSPAMD_HTML_COMPONENT_TITLE},
+ // Meta & document
+ {"charset", html_component_enum_type::RSPAMD_HTML_COMPONENT_CHARSET},
+ {"content", html_component_enum_type::RSPAMD_HTML_COMPONENT_CONTENT},
+ {"http-equiv", html_component_enum_type::RSPAMD_HTML_COMPONENT_HTTP_EQUIV},
+ // Accessibility
+ {"role", html_component_enum_type::RSPAMD_HTML_COMPONENT_ROLE},
+ {"tabindex", html_component_enum_type::RSPAMD_HTML_COMPONENT_TABINDEX},
+ // Background
+ {"background", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND},
+ {"background-image", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE},
+ {"background-color", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR},
+ {"background-repeat", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT},
+ {"background-position", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION},
+ // Email-specific tracking
+ {"data-track", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_TRACK},
+ {"data-id", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_ID},
+ {"data-url", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_URL},
});
#define msg_debug_html(...) rspamd_conditional_debug_fast(NULL, NULL, \
@@ -199,18 +265,608 @@ html_check_balance(struct html_content *hc,
return nullptr;
}
-auto html_component_from_string(const std::string_view &st) -> std::optional<html_component_type>
+auto html_component_from_string(std::string_view name, std::string_view value) -> html_tag_component
{
- auto known_component_it = html_components_map.find(st);
+ auto known_component_it = html_components_map.find(name);
if (known_component_it != html_components_map.end()) {
- return known_component_it->second;
+ switch (known_component_it->second) {
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_NAME:
+ return html_component_name{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF:
+ return html_component_href{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_COLOR:
+ return html_component_color{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BGCOLOR:
+ return html_component_bgcolor{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_STYLE:
+ return html_component_style{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CLASS:
+ return html_component_class{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_WIDTH:
+ return html_component_width{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_HEIGHT:
+ return html_component_height{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_SIZE:
+ return html_component_size{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_REL:
+ return html_component_rel{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_ALT:
+ return html_component_alt{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_ID:
+ return html_component_id{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_HIDDEN:
+ return html_component_hidden{};
+ // Typography
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_FAMILY:
+ return html_component_font_family{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_SIZE:
+ return html_component_font_size{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_WEIGHT:
+ return html_component_font_weight{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_STYLE:
+ return html_component_font_style{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_ALIGN:
+ return html_component_text_align{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_DECORATION:
+ return html_component_text_decoration{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_LINE_HEIGHT:
+ return html_component_line_height{value};
+ // Layout
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN:
+ return html_component_margin{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_TOP:
+ return html_component_margin_top{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM:
+ return html_component_margin_bottom{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_LEFT:
+ return html_component_margin_left{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_RIGHT:
+ return html_component_margin_right{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING:
+ return html_component_padding{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_TOP:
+ return html_component_padding_top{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_BOTTOM:
+ return html_component_padding_bottom{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_LEFT:
+ return html_component_padding_left{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_RIGHT:
+ return html_component_padding_right{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER:
+ return html_component_border{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_COLOR:
+ return html_component_border_color{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_WIDTH:
+ return html_component_border_width{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_STYLE:
+ return html_component_border_style{value};
+ // Display
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DISPLAY:
+ return html_component_display{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_VISIBILITY:
+ return html_component_visibility{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_OPACITY:
+ return html_component_opacity{value};
+ // Dimensions
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_WIDTH:
+ return html_component_min_width{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_WIDTH:
+ return html_component_max_width{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_HEIGHT:
+ return html_component_min_height{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_HEIGHT:
+ return html_component_max_height{value};
+ // Table
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLPADDING:
+ return html_component_cellpadding{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLSPACING:
+ return html_component_cellspacing{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_VALIGN:
+ return html_component_valign{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_ALIGN:
+ return html_component_align{value};
+ // Form
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TYPE:
+ return html_component_type{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_VALUE:
+ return html_component_value{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PLACEHOLDER:
+ return html_component_placeholder{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DISABLED:
+ return html_component_disabled{};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_READONLY:
+ return html_component_readonly{};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CHECKED:
+ return html_component_checked{};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_SELECTED:
+ return html_component_selected{};
+ // Link & media
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TARGET:
+ return html_component_target{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TITLE:
+ return html_component_title{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_SRC:
+ return html_component_src{value};
+ // Meta
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CHARSET:
+ return html_component_charset{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CONTENT:
+ return html_component_content{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_HTTP_EQUIV:
+ return html_component_http_equiv{value};
+ // Accessibility
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_ROLE:
+ return html_component_role{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TABINDEX:
+ return html_component_tabindex{value};
+ // Background
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND:
+ return html_component_background{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE:
+ return html_component_background_image{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR:
+ return html_component_background_color{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT:
+ return html_component_background_repeat{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION:
+ return html_component_background_position{value};
+ // Email tracking
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_TRACK:
+ return html_component_data_track{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_ID:
+ return html_component_data_id{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_URL:
+ return html_component_data_url{value};
+ default:
+ return html_component_unknown{name, value};
+ }
}
else {
- return std::nullopt;
+ return html_component_unknown{name, value};
}
}
+using component_extractor_func = std::function<std::optional<std::string_view>(const html_tag *)>;
+static const auto component_extractors = frozen::make_unordered_map<frozen::string, component_extractor_func>(
+ {
+ // Basic components
+ {"name", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_name>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"href", [](const html_tag *tag) { return tag->find_href(); }},
+ {"src", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_src>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"class", [](const html_tag *tag) { return tag->find_class(); }},
+ {"id", [](const html_tag *tag) { return tag->find_id(); }},
+ {"style", [](const html_tag *tag) { return tag->find_style(); }},
+ {"alt", [](const html_tag *tag) { return tag->find_alt(); }},
+ {"rel", [](const html_tag *tag) { return tag->find_rel(); }},
+ {"color", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_color>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"bgcolor", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_bgcolor>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Numeric components (return string representation)
+ {"width", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_width>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"height", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_height>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"size", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_size>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Boolean components
+ {"hidden", [](const html_tag *tag) -> std::optional<std::string_view> {
+ return tag->is_hidden() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }},
+
+ // Typography components
+ {"font-family", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_font_family>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"font-size", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_font_size>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"font-weight", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_font_weight>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"font-style", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_font_style>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"text-align", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_text_align>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"text-decoration", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_text_decoration>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"line-height", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_line_height>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Layout components
+ {"margin", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"margin-top", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin_top>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"margin-bottom", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin_bottom>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"margin-left", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin_left>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"margin-right", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin_right>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding-top", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding_top>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding-bottom", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding_bottom>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding-left", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding_left>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding-right", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding_right>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"border", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_border>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"border-color", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_border_color>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"border-width", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_border_width>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"border-style", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_border_style>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Display components
+ {"display", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_display>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"visibility", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_visibility>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"opacity", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_opacity>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Additional dimensions
+ {"min-width", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_min_width>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"max-width", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_max_width>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"min-height", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_min_height>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"max-height", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_max_height>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Table components
+ {"cellpadding", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_cellpadding>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"cellspacing", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_cellspacing>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"valign", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_valign>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"align", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_align>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Form components
+ {"type", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_type>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"value", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_value>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"placeholder", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_placeholder>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"disabled", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_disabled>()) {
+ return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }
+ return std::nullopt;
+ }},
+ {"readonly", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_readonly>()) {
+ return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }
+ return std::nullopt;
+ }},
+ {"checked", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_checked>()) {
+ return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }
+ return std::nullopt;
+ }},
+ {"selected", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_selected>()) {
+ return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }
+ return std::nullopt;
+ }},
+
+ // Link & media components
+ {"target", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_target>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"title", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_title>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Meta components
+ {"charset", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_charset>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"content", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_content>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"http-equiv", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_http_equiv>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Accessibility components
+ {"role", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_role>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"tabindex", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_tabindex>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Background components
+ {"background", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"background-image", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background_image>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"background-color", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background_color>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"background-repeat", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background_repeat>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"background-position", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background_position>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Email tracking components
+ {"data-track", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_data_track>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"data-id", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_data_id>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"data-url", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_data_url>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ });
+
+auto html_tag::find_component_by_name(std::string_view attr_name) const -> std::optional<std::string_view>
+{
+ auto it = component_extractors.find(attr_name);
+ if (it != component_extractors.end()) {
+ return it->second(this);
+ }
+
+ // Fallback to unknown components
+ return find_unknown_component(attr_name);
+}
+
+auto html_tag::get_all_attributes() const -> std::vector<std::pair<std::string_view, std::string_view>>
+{
+ std::vector<std::pair<std::string_view, std::string_view>> result;
+
+ // First, get all known attributes using the component_extractors map
+ for (const auto &[attr_name, extractor_func]: component_extractors) {
+ if (auto value = extractor_func(this)) {
+ // Convert frozen::string to std::string_view for the key
+ std::string_view name_view{attr_name.data(), attr_name.size()};
+ result.emplace_back(name_view, value.value());
+ }
+ }
+
+ // Then add all unknown attributes
+ auto unknown_attrs = get_unknown_components();
+ for (const auto &[name, value]: unknown_attrs) {
+ result.emplace_back(name, value);
+ }
+
+ return result;
+}
+
enum tag_parser_state {
parse_start = 0,
parse_name,
@@ -234,13 +890,13 @@ enum tag_parser_state {
struct tag_content_parser_state {
tag_parser_state cur_state = parse_start;
std::string buf;
- std::optional<html_component_type> cur_component;
+ std::string attr_name;// Store current attribute name
void reset()
{
cur_state = parse_start;
buf.clear();
- cur_component = std::nullopt;
+ attr_name.clear();
}
};
@@ -254,56 +910,50 @@ html_parse_tag_content(rspamd_mempool_t *pool,
auto state = parser_env.cur_state;
/*
- * Stores tag component if it doesn't exist, performing copy of the
- * value + decoding of the entities
- * Parser env is set to clear the current html attribute fields (saved_p and
- * cur_component)
+ * Stores tag component creating the appropriate variant type
+ * Parser env is cleared after storing
*/
auto store_component_value = [&]() -> void {
- if (parser_env.cur_component) {
+ if (!parser_env.attr_name.empty()) {
+ std::string_view attr_name_view, value_view;
- if (parser_env.buf.empty()) {
- tag->components.emplace_back(parser_env.cur_component.value(),
- std::string_view{});
+ // Store attribute name in persistent memory
+ if (!parser_env.attr_name.empty()) {
+ auto *name_storage = rspamd_mempool_alloc_buffer(pool, parser_env.attr_name.size());
+ memcpy(name_storage, parser_env.attr_name.data(), parser_env.attr_name.size());
+ attr_name_view = {name_storage, parser_env.attr_name.size()};
}
- else {
- /* We need to copy buf to a persistent storage */
- auto *s = rspamd_mempool_alloc_buffer(pool, parser_env.buf.size());
- if (parser_env.cur_component.value() == html_component_type::RSPAMD_HTML_COMPONENT_ID ||
- parser_env.cur_component.value() == html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
- /* Lowercase */
- rspamd_str_copy_lc(parser_env.buf.data(), s, parser_env.buf.size());
+ // Store value in persistent memory if not empty
+ if (!parser_env.buf.empty()) {
+ auto *value_storage = rspamd_mempool_alloc_buffer(pool, parser_env.buf.size());
+
+ // Lowercase for id and class attributes
+ if (parser_env.attr_name == "id" || parser_env.attr_name == "class") {
+ rspamd_str_copy_lc(parser_env.buf.data(), value_storage, parser_env.buf.size());
}
else {
- memcpy(s, parser_env.buf.data(), parser_env.buf.size());
+ memcpy(value_storage, parser_env.buf.data(), parser_env.buf.size());
}
- auto sz = rspamd_html_decode_entitles_inplace(s, parser_env.buf.size());
- tag->components.emplace_back(parser_env.cur_component.value(),
- std::string_view{s, sz});
+ auto sz = rspamd_html_decode_entitles_inplace(value_storage, parser_env.buf.size());
+ value_view = {value_storage, sz};
}
+
+ // Create the appropriate component variant
+ auto component = html_component_from_string(attr_name_view, value_view);
+ tag->components.emplace_back(std::move(component));
}
parser_env.buf.clear();
- parser_env.cur_component = std::nullopt;
+ parser_env.attr_name.clear();
};
auto store_component_name = [&]() -> bool {
decode_html_entitles_inplace(parser_env.buf);
- auto known_component_it = html_components_map.find(std::string_view{parser_env.buf});
+ parser_env.attr_name = parser_env.buf;
parser_env.buf.clear();
-
- if (known_component_it != html_components_map.end()) {
- parser_env.cur_component = known_component_it->second;
-
- return true;
- }
- else {
- parser_env.cur_component = std::nullopt;
- }
-
- return false;
+ return true;
};
auto store_value_character = [&](bool lc) -> void {
@@ -471,6 +1121,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
case parse_start_dquote:
if (*in == '"') {
+ store_component_value();
state = spaces_after_param;
}
else {
@@ -481,6 +1132,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
case parse_start_squote:
if (*in == '\'') {
+ store_component_value();
state = spaces_after_param;
}
else {
@@ -620,7 +1272,7 @@ html_process_url_tag(rspamd_mempool_t *pool,
struct html_tag *tag,
struct html_content *hc) -> std::optional<struct rspamd_url *>
{
- auto found_href_maybe = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_HREF);
+ auto found_href_maybe = tag->find_href();
if (found_href_maybe) {
/* Check base url */
@@ -816,130 +1468,126 @@ html_process_img_tag(rspamd_mempool_t *pool,
img = rspamd_mempool_alloc0_type(pool, struct html_image);
img->tag = tag;
- for (const auto &param: tag->components) {
+ // Process SRC component (preferred for img tags) or HREF component (fallback)
+ std::optional<std::string_view> href_value;
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HREF) {
- /* Check base url */
- const auto &href_value = param.value;
+ // Try SRC first (standard for img tags)
+ if (auto src_comp = tag->find_component<html_component_src>()) {
+ href_value = src_comp.value()->value;
+ }
+ // Fallback to HREF (for backward compatibility or non-standard usage)
+ else if (auto href_comp = tag->find_href()) {
+ href_value = href_comp;
+ }
- if (href_value.size() > 0) {
- rspamd_ftok_t fstr;
- fstr.begin = href_value.data();
- fstr.len = href_value.size();
- img->src = rspamd_mempool_ftokdup(pool, &fstr);
+ if (href_value && href_value->size() > 0) {
+ rspamd_ftok_t fstr;
+ fstr.begin = href_value->data();
+ fstr.len = href_value->size();
+ img->src = rspamd_mempool_ftokdup(pool, &fstr);
- if (href_value.size() > sizeof("cid:") - 1 && memcmp(href_value.data(),
- "cid:", sizeof("cid:") - 1) == 0) {
- /* We have an embedded image */
- img->src += sizeof("cid:") - 1;
- img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
- }
- else {
- if (href_value.size() > sizeof("data:") - 1 && memcmp(href_value.data(),
- "data:", sizeof("data:") - 1) == 0) {
- /* We have an embedded image in HTML tag */
- img->flags |=
- (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
- html_process_data_image(pool, img, href_value);
- hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
- }
- else {
- img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
- if (img->src) {
-
- std::string_view cpy{href_value};
- auto maybe_url = html_process_url(pool, cpy);
-
- if (maybe_url) {
- img->url = maybe_url.value();
- struct rspamd_url *existing;
-
- img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
- existing = rspamd_url_set_add_or_return(url_set,
- img->url);
-
- if (existing && existing != img->url) {
- /*
- * We have some other URL that could be
- * found, e.g. from another part. However,
- * we still want to set an image flag on it
- */
- existing->flags |= img->url->flags;
- existing->count++;
- }
- else if (part_urls) {
- /* New url */
- g_ptr_array_add(part_urls, img->url);
- }
- }
- }
- }
- }
- }
+ if (href_value->size() > sizeof("cid:") - 1 && memcmp(href_value->data(),
+ "cid:", sizeof("cid:") - 1) == 0) {
+ /* We have an embedded image */
+ img->src += sizeof("cid:") - 1;
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
}
+ else {
+ if (href_value->size() > sizeof("data:") - 1 && memcmp(href_value->data(),
+ "data:", sizeof("data:") - 1) == 0) {
+ /* We have an embedded image in HTML tag */
+ img->flags |=
+ (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
+ html_process_data_image(pool, img, *href_value);
+ hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
+ }
+ else {
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+ if (img->src) {
+ std::string_view cpy{*href_value};
+ auto maybe_url = html_process_url(pool, cpy);
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT) {
- unsigned long val;
+ if (maybe_url) {
+ img->url = maybe_url.value();
+ struct rspamd_url *existing;
- rspamd_strtoul(param.value.data(), param.value.size(), &val);
- img->height = val;
- }
+ img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+ existing = rspamd_url_set_add_or_return(url_set,
+ img->url);
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_WIDTH) {
- unsigned long val;
-
- rspamd_strtoul(param.value.data(), param.value.size(), &val);
- img->width = val;
+ if (existing && existing != img->url) {
+ /*
+ * We have some other URL that could be
+ * found, e.g. from another part. However,
+ * we still want to set an image flag on it
+ */
+ existing->flags |= img->url->flags;
+ existing->count++;
+ }
+ else if (part_urls) {
+ /* New url */
+ g_ptr_array_add(part_urls, img->url);
+ }
+ }
+ }
+ }
}
+ }
- /* TODO: rework to css at some time */
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_STYLE) {
- if (img->height == 0) {
- auto style_st = param.value;
- auto pos = rspamd_substring_search_caseless(style_st.data(),
- style_st.size(),
- "height", sizeof("height") - 1);
- if (pos != -1) {
- auto substr = style_st.substr(pos + sizeof("height") - 1);
+ // Process numeric dimensions using the new helper methods
+ if (auto height = tag->find_height()) {
+ img->height = height.value();
+ }
- for (auto i = 0; i < substr.size(); i++) {
- auto t = substr[i];
- if (g_ascii_isdigit(t)) {
- unsigned long val;
- rspamd_strtoul(substr.data(),
- substr.size(), &val);
- img->height = val;
- break;
- }
- else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
- /* Fallback */
- break;
- }
+ if (auto width = tag->find_width()) {
+ img->width = width.value();
+ }
+
+ // Process style component for dimensions
+ if (auto style_value = tag->find_style()) {
+ if (img->height == 0) {
+ auto pos = rspamd_substring_search_caseless(style_value->data(),
+ style_value->size(),
+ "height", sizeof("height") - 1);
+ if (pos != -1) {
+ auto substr = style_value->substr(pos + sizeof("height") - 1);
+
+ for (auto i = 0; i < substr.size(); i++) {
+ auto t = substr[i];
+ if (g_ascii_isdigit(t)) {
+ unsigned long val;
+ rspamd_strtoul(substr.data(),
+ substr.size(), &val);
+ img->height = val;
+ break;
+ }
+ else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
+ /* Fallback */
+ break;
}
}
}
- if (img->width == 0) {
- auto style_st = param.value;
- auto pos = rspamd_substring_search_caseless(style_st.data(),
- style_st.size(),
- "width", sizeof("width") - 1);
- if (pos != -1) {
- auto substr = style_st.substr(pos + sizeof("width") - 1);
-
- for (auto i = 0; i < substr.size(); i++) {
- auto t = substr[i];
- if (g_ascii_isdigit(t)) {
- unsigned long val;
- rspamd_strtoul(substr.data(),
- substr.size(), &val);
- img->width = val;
- break;
- }
- else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
- /* Fallback */
- break;
- }
+ }
+ if (img->width == 0) {
+ auto pos = rspamd_substring_search_caseless(style_value->data(),
+ style_value->size(),
+ "width", sizeof("width") - 1);
+ if (pos != -1) {
+ auto substr = style_value->substr(pos + sizeof("width") - 1);
+
+ for (auto i = 0; i < substr.size(); i++) {
+ auto t = substr[i];
+ if (g_ascii_isdigit(t)) {
+ unsigned long val;
+ rspamd_strtoul(substr.data(),
+ substr.size(), &val);
+ img->width = val;
+ break;
+ }
+ else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
+ /* Fallback */
+ break;
}
}
}
@@ -968,7 +1616,7 @@ html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag,
khash_t(rspamd_url_hash) * url_set,
GPtrArray *part_urls) -> void
{
- auto found_rel_maybe = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_REL);
+ auto found_rel_maybe = tag->find_rel();
if (found_rel_maybe) {
if (found_rel_maybe.value() == "icon") {
@@ -984,24 +1632,23 @@ html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
std::optional<css::css_value> maybe_fgcolor, maybe_bgcolor;
bool hidden = false;
- for (const auto &param: tag->components) {
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_COLOR) {
- maybe_fgcolor = css::css_value::maybe_color_from_string(param.value);
- }
-
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR) {
- maybe_bgcolor = css::css_value::maybe_color_from_string(param.value);
- }
+ // Process color components
+ if (auto color_comp = tag->find_component<html_component_color>()) {
+ maybe_fgcolor = css::css_value::maybe_color_from_string(color_comp.value()->value);
+ }
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_STYLE) {
- tag->block = rspamd::css::parse_css_declaration(pool, param.value);
- }
+ if (auto bgcolor_comp = tag->find_component<html_component_bgcolor>()) {
+ maybe_bgcolor = css::css_value::maybe_color_from_string(bgcolor_comp.value()->value);
+ }
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN) {
- hidden = true;
- }
+ // Process style component
+ if (auto style_value = tag->find_style()) {
+ tag->block = rspamd::css::parse_css_declaration(pool, *style_value);
}
+ // Check if hidden
+ hidden = tag->is_hidden();
+
if (!tag->block) {
tag->block = html_block::undefined_html_block_pool(pool);
}
@@ -1284,7 +1931,7 @@ html_append_tag_content(rspamd_mempool_t *pool,
}
else if (tag->id == Tag_IMG) {
/* Process ALT if presented */
- auto maybe_alt = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_ALT);
+ auto maybe_alt = tag->find_alt();
if (maybe_alt) {
if (!hc->parsed.empty() && !g_ascii_isspace(hc->parsed.back())) {
@@ -1384,9 +2031,7 @@ auto html_process_input(struct rspamd_task *task,
overflow_input = true;
}
- auto new_tag = [&](int flags = 0) -> struct html_tag *
- {
-
+ auto new_tag = [&](int flags = 0) -> struct html_tag * {
if (hc->all_tags.size() > rspamd::html::max_tags) {
hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS;
@@ -2151,7 +2796,7 @@ auto html_process_input(struct rspamd_task *task,
/* Leftover after content */
switch (state) {
case tags_limit_overflow:
- html_append_parsed(hc, {c, (std::size_t)(end - c)},
+ html_append_parsed(hc, {c, (std::size_t) (end - c)},
false, end - start, hc->parsed);
break;
default:
@@ -2390,4 +3035,4 @@ gsize rspamd_html_get_tags_count(void *html_content)
}
return hc->all_tags.size();
-} \ No newline at end of file
+}