From c8c91b2f1ee7ee321c68219b9eb515359a5ae962 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 23 Jun 2021 12:19:22 +0100 Subject: [PATCH] [Minor] Some more fixes to spaces normalisation --- src/libserver/html/html.cxx | 2 +- src/libserver/html/html_entities.cxx | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 20a38ee09..694a172b2 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1824,7 +1824,7 @@ TEST_CASE("html text extraction") "

\n" "
\n" " test
" - "", "\ntest\n"}, + "", "\n\n\ntest\n"}, }; rspamd_url_init(NULL); diff --git a/src/libserver/html/html_entities.cxx b/src/libserver/html/html_entities.cxx index d024c12e1..b0e682807 100644 --- a/src/libserver/html/html_entities.cxx +++ b/src/libserver/html/html_entities.cxx @@ -2550,13 +2550,9 @@ decode_html_entitles_inplace(char *s, std::size_t len, bool norm_spaces) } } - if (norm_spaces && g_ascii_isspace(*t)) { - do { + if (norm_spaces) { + while (t > s && g_ascii_isspace(*(t - 1))) { t --; - } while (t > s && g_ascii_isspace(*t)); - - if (!g_ascii_isspace(*t)) { - t++; /* Preserve last space character */ } } @@ -2573,13 +2569,13 @@ TEST_SUITE("html") { {"abc def", "abc def"}, {"abc\ndef", "abc def"}, {"abc\n \tdef", "abc def"}, - {" abc def ", " abc def "}, + {" abc def ", "abc def"}, {"FOO>BAR", "FOO>BAR"}, {"FOO>BAR", "FOO>BAR"}, {"FOO> BAR", "FOO>BAR"}, {"FOO>;;BAR", "FOO>;;BAR"}, - {"I'm ¬it; ", "I'm ¬it; "}, - {"I'm ∉ ", "I'm ∉ "}, + {"I'm ¬it; ", "I'm ¬it;"}, + {"I'm ∉ ", "I'm ∉"}, {"FOO& BAR", "FOO& BAR"}, {"FOO&&&>BAR", "FOO&&&>BAR"}, {"FOO)BAR", "FOO)BAR"}, -- 2.39.5