diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-07 17:33:42 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-07 17:33:42 +0100 |
commit | a7f57a9c6c66471d9b478e99c51d1b792183b38d (patch) | |
tree | 5435bc86e98c7a34458a9df1b87510a2104d5a3f /src/libserver/html/html.cxx | |
parent | 0aaf6ac2c1e8aaf9fe53a64cbb5ec06586fa9ccd (diff) | |
download | rspamd-a7f57a9c6c66471d9b478e99c51d1b792183b38d.tar.gz rspamd-a7f57a9c6c66471d9b478e99c51d1b792183b38d.zip |
[Minor] Moar fixes for spaces stuff in html
Diffstat (limited to 'src/libserver/html/html.cxx')
-rw-r--r-- | src/libserver/html/html.cxx | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index e0a57387e..a6fcfe36b 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1011,14 +1011,20 @@ static inline auto html_append_content(struct html_content *hc, std::string_view data, bool transparent) -> auto { auto cur_offset = hc->parsed.size(); - hc->parsed.append(data); - if (cur_offset > 0 && data.size() > 0) { - auto last = hc->parsed.back(); - auto first_appended = data.front(); - if (first_appended == ' ' && !g_ascii_isspace(last)) { - cur_offset++; + if (data.size() > 0) { + /* Handle multiple spaces at the begin */ + + if (cur_offset > 0) { + auto last = hc->parsed.back(); + if (!g_ascii_isspace(last) && g_ascii_isspace(data.front())) { + hc->parsed.append(" "); + data = {data.data() + 1, data.size() - 1}; + cur_offset ++; + } } + + hc->parsed.append(data); } auto nlen = decode_html_entitles_inplace(hc->parsed.data() + cur_offset, @@ -2075,7 +2081,7 @@ TEST_CASE("html text extraction") " </P>\n" " <b>stuff</p>?\n" " </body>\n" - "</html>", "Hello, world! test\ndata<>\nstuff?"}, + "</html>", "Hello, world! test \ndata<>\nstuff?"}, {"<p><!--comment-->test</br></hr><br>", "test\n"}, /* Tables */ {"<table>\n" @@ -2118,9 +2124,15 @@ TEST_CASE("html text extraction") " Sincerely,\n Skype Web\n"}, /* bgcolor propagation */ {"<a style=\"display: inline-block; color: #ffffff; background-color: #00aff0;\">\n" - "<span style=\"color: #00aff0;\">F</span>Rev<span style=\"opacity: 1;\"></span></span>ie<span style=\"opacity: 1;\"></span>\n" + "<span style=\"color: #00aff0;\">F</span>Rev<span style=\"opacity: 1;\"></span></span>ie<span style=\"opacity: 1;\"></span>" "</span>w<span style=\"color: #00aff0;\">F<span style=\"opacity: 1;\">ΜΉ</span></span>", " Review"}, + /* Colors */ + {"goodbye <span style=\"COLOR: rgb(64,64,64)\">cruel</span>" + "<span>world</span>", "goodbye cruelworld"}, + /* Newline before tag -> must be space */ + {"goodbye <span style=\"COLOR: rgb(64,64,64)\">cruel</span>\n" + "<span>world</span>", "goodbye cruel world"}, }; rspamd_url_init(NULL); |