]> source.dussan.org Git - rspamd.git/commitdiff
[Project] More fixes for closed tags
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 1 Jul 2021 16:58:59 +0000 (17:58 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 1 Jul 2021 16:58:59 +0000 (17:58 +0100)
src/libserver/html/html.cxx

index ae73b7413def9e6a18a5c1afa6497f3cb6ed43f8..82c5d213c3a501942843b730348760eea826215e 100644 (file)
@@ -1335,6 +1335,8 @@ html_process_input(rspamd_mempool_t *pool,
                if (cur_tag->flags & FL_CLOSED) {
                        cur_tag->closing.end = cur_tag->content_offset;
                        cur_tag->closing.start = cur_tag->tag_start;
+
+                       cur_tag = parent_tag;
                }
        };
 
@@ -1916,17 +1918,10 @@ TEST_CASE("html text extraction")
 {
 
        const std::vector<std::pair<std::string, std::string>> cases{
-                       /* Tables */
-                       {"<table>\n"
-                        "      <tr>\n"
-                        "        <th>heada</th>\n"
-                        "        <th>headb</th>\n"
-                        "      </tr>\n"
-                        "      <tr>\n"
-                        "        <td>data1</td>\n"
-                        "        <td>data2</td>\n"
-                        "      </tr>\n"
-                        "    </table>", "heada headb\ndata1 data2\n"},
+                       {"  <body>\n"
+                        "    <!-- escape content -->\n"
+                        "    a&nbsp;b a &gt; b a &lt; b a &amp; b &apos;a &quot;a&quot;\n"
+                        "  </body>", R"|(a b a > b a < b a & b 'a "a")|"},
                        /* XML tags */
                        {"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n"
                         " <!DOCTYPE html\n"
@@ -1975,7 +1970,17 @@ TEST_CASE("html text extraction")
                         "  </body>\n"
                         "</html>", "Hello, world! test\ndata<>\nstuff?"},
                        {"<p><!--comment-->test</br></hr><br>", "test\n"},
-
+                       /* Tables */
+                       {"<table>\n"
+                        "      <tr>\n"
+                        "        <th>heada</th>\n"
+                        "        <th>headb</th>\n"
+                        "      </tr>\n"
+                        "      <tr>\n"
+                        "        <td>data1</td>\n"
+                        "        <td>data2</td>\n"
+                        "      </tr>\n"
+                        "    </table>", "heada headb\ndata1 data2\n"},
        };
 
        rspamd_url_init(NULL);