]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Some more fixes to spaces normalisation
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 23 Jun 2021 11:19:22 +0000 (12:19 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 23 Jun 2021 11:19:22 +0000 (12:19 +0100)
src/libserver/html/html.cxx
src/libserver/html/html_entities.cxx

index 20a38ee09bee5836d9de6a35aeb09c54397da420..694a172b2ac4bce3ddedd851b364c5754623b77f 100644 (file)
@@ -1824,7 +1824,7 @@ TEST_CASE("html text extraction")
                         "    <div class=\"moz-forward-container\"><br>\n"
                         "      <br>\n"
                         "      test</div>"
-                        "</body>", "\ntest\n"},
+                        "</body>", "\n\n\ntest\n"},
        };
 
        rspamd_url_init(NULL);
index d024c12e1d0bdf5866225e975dcd35890618e5b2..b0e682807625a3c14112a4136720dfeda2a384be 100644 (file)
@@ -2550,13 +2550,9 @@ decode_html_entitles_inplace(char *s, std::size_t len, bool norm_spaces)
                }
        }
 
-       if (norm_spaces && g_ascii_isspace(*t)) {
-               do {
+       if (norm_spaces) {
+               while (t > s && g_ascii_isspace(*(t - 1))) {
                        t --;
-               } while (t > s && g_ascii_isspace(*t));
-
-               if (!g_ascii_isspace(*t)) {
-                       t++; /* Preserve last space character */
                }
        }
 
@@ -2573,13 +2569,13 @@ TEST_SUITE("html") {
                                {"abc     def", "abc def"},
                                {"abc\ndef", "abc def"},
                                {"abc\n \tdef", "abc def"},
-                               {"    abc def   ", " abc def "},
+                               {"    abc def   ", "abc def"},
                                {"FOO&gt;BAR", "FOO>BAR"},
                                {"FOO&gtBAR", "FOO>BAR"},
                                {"FOO&gt BAR", "FOO>BAR"},
                                {"FOO&gt;;;BAR", "FOO>;;BAR"},
-                               {"I'm &notit; ", "I'm ¬it; "},
-                               {"I'm &notin; ", "I'm ∉ "},
+                               {"I'm &notit; ", "I'm ¬it;"},
+                               {"I'm &notin; ", "I'm ∉"},
                                {"FOO& BAR", "FOO& BAR"},
                                {"FOO&&&&gt;BAR", "FOO&&&>BAR"},
                                {"FOO&#41;BAR", "FOO)BAR"},