]> source.dussan.org Git - rspamd.git/commitdiff
html content extraction test improv 1596/head
authorPragadeesh C <cpragadeesh@gmail.com>
Fri, 14 Apr 2017 13:32:37 +0000 (06:32 -0700)
committerPragadeesh C <cpragadeesh@gmail.com>
Fri, 14 Apr 2017 13:32:37 +0000 (06:32 -0700)
test/lua/unit/html.lua

index f1408c25a0fe211642cad4d3a7524c0d70d2dc8b..489947ffbacaa67bb90216031a066646e0d0b27d 100644 (file)
@@ -57,6 +57,64 @@ context("HTML processing", function()
   </body>
 </html>
       ]], 'Hello, world!'},
+      {[[
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>title</title>
+    <link rel="stylesheet" href="style.css">
+    <script src="script.js"></script>
+  </head>
+  <body>
+    <!-- page content -->
+    Hello, world!<br>test</br><br>content</hr>more content<br>
+    <div>
+      content inside div
+    </div>
+  </body>
+</html>
+      ]], 'Hello, world!\r\ntest\r\ncontent\r\nmore content\r\ncontent inside div\r\n'},
+      {[[
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>title</title>
+    <link rel="stylesheet" href="style.css">
+    <script src="script.js"></script>
+  </head>
+  <body>
+    <!-- tabular content -->
+    <table>
+      content
+    </table>
+    <table>
+      <tr>
+        <th>heada</th>
+        <th>headb</th>
+      </tr>
+      <tr>
+        <td>data1</td>
+        <td>data2</td>
+      </tr>
+    </table>
+
+  </body>
+</html>
+      ]], 'content heada headb\r\ndata1 data2\r\n'},
+      {[[
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>title</title>
+    <link rel="stylesheet" href="style.css">
+    <script src="script.js"></script>
+  </head>
+  <body>
+    <!-- escape content -->
+    a&nbsp;b a &gt; b a &lt; b a &amp; b &apos;a &quot;a&quot;
+  </body>
+</html>
+      ]], 'a b a > b a < b a & b \'a "a"'},
     }
 
     for _,c in ipairs(cases) do