}
}
}
-
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_ALT) {
- if (!hc->parsed.empty() && !g_ascii_isspace (hc->parsed.back())) {
- /* Add a space */
- hc->parsed += ' ';
- }
- hc->parsed.append(param.value);
-
- if (!g_ascii_isspace (hc->parsed.back())) {
- /* Add a space */
- hc->parsed += ' ';
- }
- }
}
if (img->embedded_image) {
}
static auto
-html_append_tag_content(const gchar *start, gsize len,
+html_process_displayed_href_tag (rspamd_mempool_t *pool,
+ struct html_content *hc,
+ std::string_view data,
+ const struct html_tag *cur_tag,
+ GList **exceptions,
+ khash_t (rspamd_url_hash) *url_set,
+ goffset dest_offset) -> void
+{
+
+ if (std::holds_alternative<rspamd_url *>(cur_tag->extra)) {
+ auto *url = std::get<rspamd_url *>(cur_tag->extra);
+
+ html_check_displayed_url(pool,
+ exceptions, url_set,
+ data,
+ dest_offset,
+ url);
+ }
+}
+
+static auto
+html_append_tag_content(rspamd_mempool_t *pool,
+ const gchar *start, gsize len,
struct html_content *hc,
const html_tag *tag,
- std::vector<const html_tag *> &enclosed_tags) -> goffset
+ std::vector<const html_tag *> &enclosed_tags,
+ GList **exceptions,
+ khash_t (rspamd_url_hash) *url_set) -> goffset
{
auto is_visible = true, is_block = false;
- goffset next_tag_offset = tag->content_length + tag->content_offset;
+ goffset next_tag_offset = tag->content_length + tag->content_offset,
+ initial_dest_offset = hc->parsed.size();
if (tag->id == Tag_BR || tag->id == Tag_HR) {
if (!hc->parsed.empty()) {
}
if (!tag->block) {
- is_visible = false;
+ is_visible = true;
}
else if (!tag->block->is_visible()) {
is_visible = false;
next_enclosed = enclosed_tags.back();
enclosed_start = next_enclosed->tag_start;
enclosed_end = next_enclosed->content_length +
- next_enclosed->content_offset;
+ next_enclosed->content_offset;
if (enclosed_end > next_tag_offset) {
next_tag_offset = enclosed_end;
if (next_enclosed) {
/* Recursively print enclosed tags */
std::reverse(std::begin(nested_stack), std::end(nested_stack));
- cur_offset = html_append_tag_content(start, len, hc, next_enclosed, nested_stack);
+ cur_offset = html_append_tag_content(pool, start, len, hc, next_enclosed,
+ nested_stack, exceptions, url_set);
initial_part_len = next_tag_offset - cur_offset;
if (is_visible && initial_part_len > 0) {
}
}
+ if (is_visible && !(tag->flags & FL_CLOSING)) {
+ if (tag->id == Tag_A) {
+ auto written_len = hc->parsed.size() - initial_dest_offset;
+ html_process_displayed_href_tag(pool, hc,
+ {hc->parsed.data() + initial_dest_offset, written_len},
+ tag, exceptions,
+ url_set, initial_dest_offset);
+ }
+ else if (tag->id == Tag_IMG) {
+ /* Process ALT if presented */
+ auto maybe_alt = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_ALT);
+
+ if (maybe_alt) {
+ if (!hc->parsed.empty() && !g_ascii_isspace (hc->parsed.back())) {
+ /* Add a space */
+ hc->parsed += ' ';
+ }
+ hc->parsed.append(maybe_alt.value());
+
+ if (!g_ascii_isspace (hc->parsed.back())) {
+ /* Add a space */
+ hc->parsed += ' ';
+ }
+ }
+ }
+ }
+
return next_tag_offset;
}
static auto
-html_append_tags_content(const gchar *start, gsize len,
- struct html_content *hc) -> void
+html_append_tags_content(rspamd_mempool_t *pool,
+ const gchar *start, gsize len,
+ struct html_content *hc,
+ GList **exceptions,
+ khash_t (rspamd_url_hash) *url_set) -> void
{
auto cur_offset = 0;
std::vector<const html_tag *> enclosed_tags_stack;
}
std::reverse(enclosed_tags_stack.begin(), enclosed_tags_stack.end());
- cur_offset = html_append_tag_content(start, len, hc, tag.get(),
- enclosed_tags_stack);
+ cur_offset = html_append_tag_content(pool, start, len, hc, tag.get(),
+ enclosed_tags_stack, exceptions, url_set);
i = j;
}
}
href_offset = hc->parsed.size();
}
}
-
- if (cur_tag->id == Tag_A) {
- if (tags_stack.size() >= 2) {
- const auto *prev_tag = tags_stack.back()->parent;
-
- if (prev_tag && prev_tag->id == Tag_A &&
- !(prev_tag->flags & (FL_CLOSING)) &&
- std::holds_alternative<rspamd_url *>(prev_tag->extra)) {
- auto *prev_url = std::get<rspamd_url *>(prev_tag->extra);
-
- std::string_view disp_part{
- hc->parsed.data() + href_offset,
- hc->parsed.size() - href_offset};
- html_check_displayed_url (pool,
- exceptions, url_set,
- disp_part,
- href_offset,
- prev_url);
- }
- }
-
- if (cur_tag->flags & (FL_CLOSING)) {
-
- /* Insert exception */
- if (url != NULL && hc->parsed.size() > href_offset) {
- std::string_view disp_part{
- hc->parsed.data() + href_offset,
- hc->parsed.size() - href_offset};
- html_check_displayed_url (pool,
- exceptions, url_set,
- disp_part,
- href_offset,
- url);
-
- }
-
- href_offset = -1;
- url = NULL;
- }
- }
}
else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) {
/*
std::sort(hc->all_tags.begin(), hc->all_tags.end(), [](const auto &pt1, const auto &pt2) -> auto {
return pt1->tag_start < pt2->tag_start;
});
- html_append_tags_content(start, end - start, hc);
+ html_append_tags_content(pool, start, end - start, hc, exceptions, url_set);
}
/* Leftover */
const std::vector<std::pair<std::string, std::string>> cases{
{"test", "test"},
- {"test ", "test "},
+ {"test ", "test"},
{"test foo, bar", "test foo, bar"},
{"<p>text</p>", "text\n"},
{"olo<p>text</p>lolo", "olo\ntext\nlolo"},
{"<b>foo<i>bar</b>baz</i>", "foobarbaz"},
{"foo<br>baz", "foo\nbaz"},
{"<div>foo</div><div>bar</div>", "foo\nbar\n"},
+ {"<a href=https://example.com>test</a>", "test"},
+ {"<img alt=test>", "test"},
};
rspamd_url_init(NULL);