summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-17 18:11:37 +0100
committerGitHub <noreply@github.com>2017-04-17 18:11:37 +0100
commitaf76c47e4a31b499fe443e59727e3f909cd763f9 (patch)
tree7acf71350bf5bdc87623af83295665732ef760a5
parent43a67aa9d3b68993de487d04d00c328ef051345b (diff)
parenta69b112bf749cdec95962ea8835bd62e4cfd5db2 (diff)
downloadrspamd-af76c47e4a31b499fe443e59727e3f909cd763f9.tar.gz
rspamd-af76c47e4a31b499fe443e59727e3f909cd763f9.zip
Merge pull request #1596 from cpragadeesh/html_test_improv
HTML content extraction tests improvement
-rw-r--r--test/lua/unit/html.lua58
1 files changed, 58 insertions, 0 deletions
diff --git a/test/lua/unit/html.lua b/test/lua/unit/html.lua
index f1408c25a..489947ffb 100644
--- a/test/lua/unit/html.lua
+++ b/test/lua/unit/html.lua
@@ -57,6 +57,64 @@ context("HTML processing", function()
</body>
</html>
]], 'Hello, world!'},
+ {[[
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>title</title>
+ <link rel="stylesheet" href="style.css">
+ <script src="script.js"></script>
+ </head>
+ <body>
+ <!-- page content -->
+ Hello, world!<br>test</br><br>content</hr>more content<br>
+ <div>
+ content inside div
+ </div>
+ </body>
+</html>
+ ]], 'Hello, world!\r\ntest\r\ncontent\r\nmore content\r\ncontent inside div\r\n'},
+ {[[
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>title</title>
+ <link rel="stylesheet" href="style.css">
+ <script src="script.js"></script>
+ </head>
+ <body>
+ <!-- tabular content -->
+ <table>
+ content
+ </table>
+ <table>
+ <tr>
+ <th>heada</th>
+ <th>headb</th>
+ </tr>
+ <tr>
+ <td>data1</td>
+ <td>data2</td>
+ </tr>
+ </table>
+
+ </body>
+</html>
+ ]], 'content heada headb\r\ndata1 data2\r\n'},
+ {[[
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>title</title>
+ <link rel="stylesheet" href="style.css">
+ <script src="script.js"></script>
+ </head>
+ <body>
+ <!-- escape content -->
+ a&nbsp;b a &gt; b a &lt; b a &amp; b &apos;a &quot;a&quot;
+ </body>
+</html>
+ ]], 'a b a > b a < b a & b \'a "a"'},
}
for _,c in ipairs(cases) do