}
}
-static void
-html_process_color(std::string_view input, struct html_color *cl)
-{
- const gchar *p = input.data(), *end = input.data() + input.size();
- char hexbuf[7];
-
- memset(cl, 0, sizeof(*cl));
-
- if (*p == '#') {
- /* HEX color */
- p++;
- rspamd_strlcpy(hexbuf, p, MIN ((gint) sizeof(hexbuf), end - p + 1));
- cl->d.val = strtoul(hexbuf, NULL, 16);
- cl->d.comp.alpha = 255;
- cl->valid = TRUE;
- }
- else if (input.size() > 4 && rspamd_lc_cmp(p, "rgb", 3) == 0) {
- /* We have something like rgba(x,x,x,x) or rgb(x,x,x) */
- enum {
- obrace,
- num1,
- num2,
- num3,
- num4,
- skip_spaces
- } state = skip_spaces, next_state = obrace;
- gulong r = 0, g = 0, b = 0, opacity = 255;
- const gchar *c;
- gboolean valid = FALSE;
-
- p += 3;
-
- if (*p == 'a') {
- p++;
- }
-
- c = p;
-
- while (p < end) {
- switch (state) {
- case obrace:
- if (*p == '(') {
- p++;
- state = skip_spaces;
- next_state = num1;
- }
- else if (g_ascii_isspace (*p)) {
- state = skip_spaces;
- next_state = obrace;
- }
- else {
- goto stop;
- }
- break;
- case num1:
- if (*p == ',') {
- if (!rspamd_strtoul(c, p - c, &r)) {
- goto stop;
- }
-
- p++;
- state = skip_spaces;
- next_state = num2;
- }
- else if (!g_ascii_isdigit (*p)) {
- goto stop;
- }
- else {
- p++;
- }
- break;
- case num2:
- if (*p == ',') {
- if (!rspamd_strtoul(c, p - c, &g)) {
- goto stop;
- }
-
- p++;
- state = skip_spaces;
- next_state = num3;
- }
- else if (!g_ascii_isdigit (*p)) {
- goto stop;
- }
- else {
- p++;
- }
- break;
- case num3:
- if (*p == ',') {
- if (!rspamd_strtoul(c, p - c, &b)) {
- goto stop;
- }
-
- valid = TRUE;
- p++;
- state = skip_spaces;
- next_state = num4;
- }
- else if (*p == ')') {
- if (!rspamd_strtoul(c, p - c, &b)) {
- goto stop;
- }
-
- valid = TRUE;
- goto stop;
- }
- else if (!g_ascii_isdigit (*p)) {
- goto stop;
- }
- else {
- p++;
- }
- break;
- case num4:
- if (*p == ',') {
- if (!rspamd_strtoul(c, p - c, &opacity)) {
- goto stop;
- }
-
- valid = TRUE;
- goto stop;
- }
- else if (*p == ')') {
- if (!rspamd_strtoul(c, p - c, &opacity)) {
- goto stop;
- }
-
- valid = TRUE;
- goto stop;
- }
- else if (!g_ascii_isdigit (*p)) {
- goto stop;
- }
- else {
- p++;
- }
- break;
- case skip_spaces:
- if (!g_ascii_isspace (*p)) {
- c = p;
- state = next_state;
- }
- else {
- p++;
- }
- break;
- }
- }
-
-stop:
-
- if (valid) {
- cl->d.comp.r = r;
- cl->d.comp.g = g;
- cl->d.comp.b = b;
- cl->d.comp.alpha = opacity;
- cl->valid = TRUE;
- }
- }
- else {
- auto maybe_color_value =
- rspamd::css::css_value::maybe_color_from_string(input);
-
- if (maybe_color_value.has_value()) {
- auto color = maybe_color_value->to_color().value();
- cl->d.val = color.to_number();
- cl->d.comp.alpha = 255; /* Non transparent */
- }
- }
-}
-
-/*
- * Target is used for in and out if this function returns TRUE
- */
-static auto
-html_process_css_size(const gchar *suffix, gsize len,
- double &tgt) -> bool
-{
- gdouble sz = tgt;
- gboolean ret = FALSE;
-
- if (len >= 2) {
- if (memcmp(suffix, "px", 2) == 0) {
- sz = (guint) sz; /* Round to number */
- ret = TRUE;
- }
- else if (memcmp(suffix, "em", 2) == 0) {
- /* EM is 16 px, so multiply and round */
- sz = (guint) (sz * 16.0);
- ret = TRUE;
- }
- else if (len >= 3 && memcmp(suffix, "rem", 3) == 0) {
- /* equal to EM in our case */
- sz = (guint) (sz * 16.0);
- ret = TRUE;
- }
- else if (memcmp(suffix, "ex", 2) == 0) {
- /*
- * Represents the x-height of the element's font.
- * On fonts with the "x" letter, this is generally the height
- * of lowercase letters in the font; 1ex = 0.5em in many fonts.
- */
- sz = (guint) (sz * 8.0);
- ret = TRUE;
- }
- else if (memcmp(suffix, "vw", 2) == 0) {
- /*
- * Vewport width in percentages:
- * we assume 1% of viewport width as 8px
- */
- sz = (guint) (sz * 8.0);
- ret = TRUE;
- }
- else if (memcmp(suffix, "vh", 2) == 0) {
- /*
- * Vewport height in percentages
- * we assume 1% of viewport width as 6px
- */
- sz = (guint) (sz * 6.0);
- ret = TRUE;
- }
- else if (len >= 4 && memcmp(suffix, "vmax", 4) == 0) {
- /*
- * Vewport width in percentages
- * we assume 1% of viewport width as 6px
- */
- sz = (guint) (sz * 8.0);
- ret = TRUE;
- }
- else if (len >= 4 && memcmp(suffix, "vmin", 4) == 0) {
- /*
- * Vewport height in percentages
- * we assume 1% of viewport width as 6px
- */
- sz = (guint) (sz * 6.0);
- ret = TRUE;
- }
- else if (memcmp(suffix, "pt", 2) == 0) {
- sz = (guint) (sz * 96.0 / 72.0); /* One point. 1pt = 1/72nd of 1in */
- ret = TRUE;
- }
- else if (memcmp(suffix, "cm", 2) == 0) {
- sz = (guint) (sz * 96.0 / 2.54); /* 96px/2.54 */
- ret = TRUE;
- }
- else if (memcmp(suffix, "mm", 2) == 0) {
- sz = (guint) (sz * 9.6 / 2.54); /* 9.6px/2.54 */
- ret = TRUE;
- }
- else if (memcmp(suffix, "in", 2) == 0) {
- sz = (guint) (sz * 96.0); /* 96px */
- ret = TRUE;
- }
- else if (memcmp(suffix, "pc", 2) == 0) {
- sz = (guint) (sz * 96.0 / 6.0); /* 1pc = 12pt = 1/6th of 1in. */
- ret = TRUE;
- }
- }
- else if (suffix[0] == '%') {
- /* Percentages from 16 px */
- sz = (guint) (sz / 100.0 * 16.0);
- ret = TRUE;
- }
-
- if (ret) {
- tgt = sz;
- }
-
- return ret;
-}
-
-static auto
-html_process_font_size(const gchar *line, guint len, guint &fs,
- gboolean is_css) -> void
-{
- const gchar *p = line, *end = line + len;
- gchar *err = NULL, numbuf[64];
- gdouble sz = 0;
- gboolean failsafe = FALSE;
-
- while (p < end && g_ascii_isspace (*p)) {
- p++;
- len--;
- }
-
- if (g_ascii_isdigit (*p)) {
- rspamd_strlcpy(numbuf, p, MIN (sizeof(numbuf), len + 1));
- sz = strtod(numbuf, &err);
-
- /* Now check leftover */
- if (sz < 0) {
- sz = 0;
- }
- }
- else {
- /* Ignore the rest */
- failsafe = TRUE;
- sz = is_css ? 16 : 1;
- /* TODO: add textual fonts descriptions */
- }
-
- if (err && *err != '\0') {
- const gchar *e = err;
- gsize slen;
-
- /* Skip spaces */
- while (*e && g_ascii_isspace (*e)) {
- e++;
- }
-
- /* Lowercase */
- slen = strlen(e);
- rspamd_str_lc((gchar *) e, slen);
-
- if (!html_process_css_size(e, slen, sz)) {
- failsafe = TRUE;
- }
- }
- else {
- /* Failsafe naked number */
- failsafe = TRUE;
- }
-
- if (failsafe) {
- if (is_css) {
- /*
- * In css mode we usually ignore sizes, but let's treat
- * small sizes specially
- */
- if (sz < 1) {
- sz = 0;
- }
- else {
- sz = 16; /* Ignore */
- }
- }
- else {
- /* In non-css mode we have to check legacy size */
- sz = sz >= 1 ? sz * 16 : 16;
- }
- }
-
- if (sz > 32) {
- sz = 32;
- }
-
- fs = sz;
-}
-
-static void
-html_process_style(rspamd_mempool_t *pool, struct html_block *bl,
- struct html_content *hc,
- std::string_view style)
-{
- const gchar *p, *c, *end, *key = NULL;
- enum {
- read_key,
- read_colon,
- read_value,
- skip_spaces,
- } state = skip_spaces, next_state = read_key;
- guint klen = 0;
- gdouble opacity = 1.0;
-
- p = style.data();
- c = p;
- end = p + style.size();
-
- while (p <= end) {
- switch (state) {
- case read_key:
- if (p == end || *p == ':') {
- key = c;
- klen = p - c;
- state = skip_spaces;
- next_state = read_value;
- }
- else if (g_ascii_isspace (*p)) {
- key = c;
- klen = p - c;
- state = skip_spaces;
- next_state = read_colon;
- }
-
- p++;
- break;
-
- case read_colon:
- if (p == end || *p == ':') {
- state = skip_spaces;
- next_state = read_value;
- }
-
- p++;
- break;
-
- case read_value:
- if (p == end || *p == ';') {
- if (key && klen && p - c > 0) {
- if ((klen == 5 && g_ascii_strncasecmp(key, "color", 5) == 0)
- || (klen == 10 && g_ascii_strncasecmp(key, "font-color", 10) == 0)) {
-
- html_process_color({c, (std::size_t)(p - c)}, &bl->font_color);
- msg_debug_html ("got color: %xd", bl->font_color.d.val);
- }
- else if ((klen == 16 && g_ascii_strncasecmp(key,
- "background-color", 16) == 0) ||
- (klen == 10 && g_ascii_strncasecmp(key,
- "background", 10) == 0)) {
-
- html_process_color({c, (std::size_t)(p - c)}, &bl->background_color);
- msg_debug_html ("got bgcolor: %xd", bl->background_color.d.val);
- }
- else if (klen == 7 && g_ascii_strncasecmp(key, "display", 7) == 0) {
- if (p - c >= 4 && rspamd_substring_search_caseless(c, p - c,
- "none", 4) != -1) {
- bl->visible = FALSE;
- msg_debug_html ("tag is not visible");
- }
- }
- else if (klen == 9 &&
- g_ascii_strncasecmp(key, "font-size", 9) == 0) {
- html_process_font_size(c, p - c,
- bl->font_size, TRUE);
- msg_debug_html ("got font size: %ud", bl->font_size);
- }
- else if (klen == 7 &&
- g_ascii_strncasecmp(key, "opacity", 7) == 0) {
- gchar numbuf[64];
-
- rspamd_strlcpy(numbuf, c,
- MIN (sizeof(numbuf), p - c + 1));
- opacity = strtod(numbuf, NULL);
-
- if (opacity > 1) {
- opacity = 1;
- }
- else if (opacity < 0) {
- opacity = 0;
- }
-
- bl->font_color.d.comp.alpha = (guint8) (opacity * 255.0);
- }
- else if (klen == 10 &&
- g_ascii_strncasecmp(key, "visibility", 10) == 0) {
- if (p - c >= 6 && rspamd_substring_search_caseless(c,
- p - c,
- "hidden", 6) != -1) {
- bl->visible = FALSE;
- msg_debug_html ("tag is not visible");
- }
- }
- }
-
- key = NULL;
- klen = 0;
- state = skip_spaces;
- next_state = read_key;
- }
-
- p++;
- break;
-
- case skip_spaces:
- if (p < end && !g_ascii_isspace (*p)) {
- c = p;
- state = next_state;
- }
- else {
- p++;
- }
-
- break;
- }
- }
-}
-
static auto
html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
struct html_content *hc) -> void
{
- auto *bl = rspamd_mempool_alloc0_type (pool, struct html_block);
- bl->tag = tag;
- bl->visible = TRUE;
- bl->font_size = (guint) -1;
- bl->font_color.d.comp.alpha = 255;
+ std::optional<css::css_value> maybe_fgcolor, maybe_bgcolor;
for (const auto ¶m : tag->parameters) {
if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_COLOR) {
- html_process_color(param.value, &bl->font_color);
- msg_debug_html ("tag %*s; got color: %xd",
- (int) tag->name.size(), tag->name.data(),
- bl->font_color.d.val);
+ maybe_fgcolor = css::css_value::maybe_color_from_string(param.value);
}
if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR) {
- html_process_color(param.value, &bl->background_color);
- msg_debug_html ("tag %*s; got bgcolor: %xd",
- (int) tag->name.size(), tag->name.data(),
- bl->background_color.d.val);
- if (tag->id == Tag_BODY) {
- /* Set global background color */
- memcpy(&hc->bgcolor, &bl->background_color,
- sizeof(hc->bgcolor));
- }
+ maybe_bgcolor = css::css_value::maybe_color_from_string(param.value);
}
if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_STYLE) {
- html_process_style(pool, bl, hc, param.value);
- msg_debug_html ("tag: %*s; got style: %*s",
- (int) tag->name.size(), tag->name.data(),
- (int) bl->style.len, bl->style.begin);
- }
-
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
- rspamd_ftok_t fstr;
- fstr.begin = param.value.data();
- fstr.len = param.value.size();
- bl->html_class = rspamd_mempool_ftokdup (pool, &fstr);
- msg_debug_html ("tag: %*s; got class: %s",
- (int) tag->name.size(), tag->name.data(), bl->html_class);
+ tag->block = rspamd::css::parse_css_declaration(pool, param.value);
}
}
- hc->blocks.push_back(bl);
- tag->block = bl;
-}
-
-static auto
-html_propagate_style(struct html_content *hc,
- struct html_tag *tag,
- struct html_block *bl,
- std::vector<struct html_block *> &blocks) -> void
-{
- gboolean push_block = FALSE;
-
- if (blocks.empty()) {
- /* No blocks to propagate */
- return;
- }
- /* Propagate from the parent if needed */
- auto *bl_parent = blocks.back();
-
- if (!bl->background_color.valid) {
- /* Try to propagate background color from parent nodes */
- if (bl_parent->background_color.valid) {
- memcpy(&bl->background_color, &bl_parent->background_color,
- sizeof(bl->background_color));
- }
- }
- else {
- push_block = TRUE;
+ if (!tag->block) {
+ tag->block = html_block::undefined_html_block_pool(pool);
}
- if (!bl->font_color.valid) {
- /* Try to propagate background color from parent nodes */
- if (bl_parent->font_color.valid) {
- memcpy(&bl->font_color, &bl_parent->font_color,
- sizeof(bl->font_color));
- }
- }
- else {
- push_block = TRUE;
- }
-
- /* Propagate font size */
- if (bl->font_size == (guint) -1) {
- if (bl_parent->font_size != (guint) -1) {
- bl->font_size = bl_parent->font_size;
- }
- }
- else {
- push_block = TRUE;
+ if (maybe_fgcolor) {
+ tag->block->set_fgcolor(maybe_fgcolor->to_color().value());
}
- /* Set bgcolor to the html bgcolor and font color to black as a last resort */
- if (!bl->font_color.valid) {
- /* Don't touch opacity as it can be set separately */
- bl->font_color.d.comp.r = 0;
- bl->font_color.d.comp.g = 0;
- bl->font_color.d.comp.b = 0;
- bl->font_color.valid = TRUE;
- }
- else {
- push_block = TRUE;
- }
-
- if (!bl->background_color.valid) {
- memcpy(&bl->background_color, &hc->bgcolor, sizeof(hc->bgcolor));
- }
- else {
- push_block = TRUE;
- }
-
- if (bl->font_size == (guint) -1) {
- bl->font_size = 16; /* Default for browsers */
- }
- else {
- push_block = TRUE;
- }
-
- if (push_block && !(tag->flags & FL_CLOSED)) {
- blocks.push_back(bl);
+ if (maybe_bgcolor) {
+ tag->block->set_bgcolor(maybe_fgcolor->to_color().value());
}
}
html_process_link_tag(pool, cur_tag, hc, url_set,
part_urls);
}
- else if (cur_tag->flags & FL_BLOCK) {
+
+ if (cur_tag->flags & FL_BLOCK) {
struct html_block *bl;
if (cur_tag->flags & FL_CLOSING) {
}
else {
html_process_block_tag(pool, cur_tag, hc);
- bl = cur_tag->block;
-
- if (bl) {
- html_propagate_style(hc, cur_tag,
- bl, blocks_stack);
-
- /* Check visibility */
- if (bl->font_size < 3 ||
- bl->font_color.d.comp.alpha < 10) {
-
- bl->visible = FALSE;
- msg_debug_html ("tag is not visible: font size: "
- "%d, alpha: %d",
- (int)bl->font_size,
- (int)bl->font_color.d.comp.alpha);
- }
-
- if (!bl->visible) {
- state = content_ignore;
- }
- }
}
}
}