From: Vsevolod Stakhov Date: Thu, 24 Jun 2021 16:38:20 +0000 (+0100) Subject: [Project] Html: More spaces logic fixes X-Git-Tag: 3.0~240 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=f8351d7aa84a526dfb8ac92fec9894f51e2a6359;p=rspamd.git [Project] Html: More spaces logic fixes --- diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 45094e7f8..894b1ee45 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1849,6 +1849,26 @@ TEST_CASE("html text extraction") "", "\n\n\ntest\n"}, {"
file " "sharing
", "fish\n"}, + /* FIXME: broken until rework */ + //{"
file " + // "sharing
foo", "fish\nfoo"}, + {"

test", "test"}, + {"\n" + "\n" + " \n" + " \n" + " title\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " Hello, world! test\n" + "

data<>\n" + "

\n" + " stuff

?\n" + " \n" + "", "Hello, world! test\ndata<> \nstuff?"} }; rspamd_url_init(NULL); diff --git a/src/libserver/html/html_entities.cxx b/src/libserver/html/html_entities.cxx index 573872f43..4cbdf02bf 100644 --- a/src/libserver/html/html_entities.cxx +++ b/src/libserver/html/html_entities.cxx @@ -2552,9 +2552,16 @@ decode_html_entitles_inplace(char *s, std::size_t len, bool norm_spaces) } if (norm_spaces) { + bool seen_spaces = false; + while (t > s && g_ascii_isspace(*(t - 1))) { + seen_spaces = true; t --; } + + if (seen_spaces) { + *t++ = ' '; + } } return (t - s);