diff options
author | Pragadeesh C <cpragadeesh@gmail.com> | 2017-04-14 06:32:37 -0700 |
---|---|---|
committer | Pragadeesh C <cpragadeesh@gmail.com> | 2017-04-14 06:32:37 -0700 |
commit | a69b112bf749cdec95962ea8835bd62e4cfd5db2 (patch) | |
tree | 8807b158e87e115e7c2691fa14974f5ceec4ef82 /test/lua | |
parent | 30324e7c53dff01bc477b592a24a1f8cc0037406 (diff) | |
download | rspamd-a69b112bf749cdec95962ea8835bd62e4cfd5db2.tar.gz rspamd-a69b112bf749cdec95962ea8835bd62e4cfd5db2.zip |
html content extraction test improv
Diffstat (limited to 'test/lua')
-rw-r--r-- | test/lua/unit/html.lua | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/test/lua/unit/html.lua b/test/lua/unit/html.lua index f1408c25a..489947ffb 100644 --- a/test/lua/unit/html.lua +++ b/test/lua/unit/html.lua @@ -57,6 +57,64 @@ context("HTML processing", function() </body> </html> ]], 'Hello, world!'}, + {[[ +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>title</title> + <link rel="stylesheet" href="style.css"> + <script src="script.js"></script> + </head> + <body> + <!-- page content --> + Hello, world!<br>test</br><br>content</hr>more content<br> + <div> + content inside div + </div> + </body> +</html> + ]], 'Hello, world!\r\ntest\r\ncontent\r\nmore content\r\ncontent inside div\r\n'}, + {[[ +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>title</title> + <link rel="stylesheet" href="style.css"> + <script src="script.js"></script> + </head> + <body> + <!-- tabular content --> + <table> + content + </table> + <table> + <tr> + <th>heada</th> + <th>headb</th> + </tr> + <tr> + <td>data1</td> + <td>data2</td> + </tr> + </table> + + </body> +</html> + ]], 'content heada headb\r\ndata1 data2\r\n'}, + {[[ +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>title</title> + <link rel="stylesheet" href="style.css"> + <script src="script.js"></script> + </head> + <body> + <!-- escape content --> + a b a > b a < b a & b 'a "a" + </body> +</html> + ]], 'a b a > b a < b a & b \'a "a"'}, } for _,c in ipairs(cases) do |