aboutsummaryrefslogtreecommitdiffstats
path: root/test/lua
diff options
context:
space:
mode:
authorPragadeesh C <cpragadeesh@gmail.com>2017-04-14 06:32:37 -0700
committerPragadeesh C <cpragadeesh@gmail.com>2017-04-14 06:32:37 -0700
commita69b112bf749cdec95962ea8835bd62e4cfd5db2 (patch)
tree8807b158e87e115e7c2691fa14974f5ceec4ef82 /test/lua
parent30324e7c53dff01bc477b592a24a1f8cc0037406 (diff)
downloadrspamd-a69b112bf749cdec95962ea8835bd62e4cfd5db2.tar.gz
rspamd-a69b112bf749cdec95962ea8835bd62e4cfd5db2.zip
html content extraction test improv
Diffstat (limited to 'test/lua')
-rw-r--r--test/lua/unit/html.lua58
1 files changed, 58 insertions, 0 deletions
diff --git a/test/lua/unit/html.lua b/test/lua/unit/html.lua
index f1408c25a..489947ffb 100644
--- a/test/lua/unit/html.lua
+++ b/test/lua/unit/html.lua
@@ -57,6 +57,64 @@ context("HTML processing", function()
</body>
</html>
]], 'Hello, world!'},
+ {[[
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>title</title>
+ <link rel="stylesheet" href="style.css">
+ <script src="script.js"></script>
+ </head>
+ <body>
+ <!-- page content -->
+ Hello, world!<br>test</br><br>content</hr>more content<br>
+ <div>
+ content inside div
+ </div>
+ </body>
+</html>
+ ]], 'Hello, world!\r\ntest\r\ncontent\r\nmore content\r\ncontent inside div\r\n'},
+ {[[
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>title</title>
+ <link rel="stylesheet" href="style.css">
+ <script src="script.js"></script>
+ </head>
+ <body>
+ <!-- tabular content -->
+ <table>
+ content
+ </table>
+ <table>
+ <tr>
+ <th>heada</th>
+ <th>headb</th>
+ </tr>
+ <tr>
+ <td>data1</td>
+ <td>data2</td>
+ </tr>
+ </table>
+
+ </body>
+</html>
+ ]], 'content heada headb\r\ndata1 data2\r\n'},
+ {[[
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>title</title>
+ <link rel="stylesheet" href="style.css">
+ <script src="script.js"></script>
+ </head>
+ <body>
+ <!-- escape content -->
+ a&nbsp;b a &gt; b a &lt; b a &amp; b &apos;a &quot;a&quot;
+ </body>
+</html>
+ ]], 'a b a > b a < b a & b \'a "a"'},
}
for _,c in ipairs(cases) do