initial_dest_offset = hc->parsed.size();
if (tag->id == Tag_BR || tag->id == Tag_HR) {
- if (!hc->parsed.empty()) {
- hc->parsed.append("\n");
- }
+ hc->parsed.append("\n");
return tag->content_offset;
}
cur_offset = html_append_tag_content(pool, start, len, hc, next_enclosed,
nested_stack, exceptions, url_set);
- initial_part_len = next_tag_offset - cur_offset;
- if (is_visible && initial_part_len > 0) {
- html_append_content(hc, {start + cur_offset,
- std::size_t(initial_part_len)});
+ if (enclosed_tags.empty()) {
+ initial_part_len = next_tag_offset - cur_offset;
+ if (is_visible && initial_part_len > 0) {
+ html_append_content(hc, {start + cur_offset,
+ std::size_t(initial_part_len)});
+ }
}
}
} while (!enclosed_tags.empty());
if (is_block && is_visible) {
- if (!hc->parsed.empty()) {
+ if (!hc->parsed.empty() && hc->parsed.back() != '\n') {
hc->parsed.append("\n");
}
}
{"<div>foo</div><div>bar</div>", "foo\nbar\n"},
{"<a href=https://example.com>test</a>", "test"},
{"<img alt=test>", "test"},
+ {"<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"></head>"
+ " <body>\n"
+ " <p><br>\n"
+ " </p>\n"
+ " <div class=\"moz-forward-container\"><br>\n"
+ " <br>\n"
+ " test</div>"
+ "</body>", "\ntest\n"},
};
rspamd_url_init(NULL);
normal_content,
ampersand,
skip_multi_spaces,
+ skip_start_spaces,
} state = parser_state::normal_content;
end = s + len;
return false;
};
+ if (norm_spaces && g_ascii_isspace(*h)) {
+ state = parser_state::skip_start_spaces;
+ }
+
while (h - s < len && t <= h) {
switch (state) {
case parser_state::normal_content:
state = parser_state::normal_content;
}
break;
+ case parser_state::skip_start_spaces:
+ if (g_ascii_isspace(*h)) {
+ h ++;
+ }
+ else {
+ state = parser_state::normal_content;
+ }
+ break;
}
}
}
}
+ if (norm_spaces && g_ascii_isspace(*t)) {
+ do {
+ t --;
+ } while (t > s && g_ascii_isspace(*t));
+
+ if (!g_ascii_isspace(*t)) {
+ t++; /* Preserve last space character */
+ }
+ }
+
return (t - s);
}