aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libserver/html.c29
-rw-r--r--test/lua/unit/html.lua11
2 files changed, 30 insertions, 10 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 914d21feb..a6e136481 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1031,7 +1031,7 @@ add_html_node (struct rspamd_task *task,
static gboolean
rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc,
- struct html_tag *tag, GNode **cur_level)
+ struct html_tag *tag, GNode **cur_level, gboolean *balanced)
{
GNode *nnode;
@@ -1046,17 +1046,29 @@ rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc,
nnode = g_node_new (tag);
+ if (tag->params) {
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t) g_list_free,
+ tag->params);
+ }
+
if (tag->flags & FL_CLOSING) {
if (!*cur_level) {
debug_task ("bad parent node");
+ g_node_destroy (nnode);
return FALSE;
}
+
g_node_append (*cur_level, nnode);
if (!rspamd_html_check_balance (nnode, cur_level)) {
debug_task (
"mark part as unbalanced as it has not pairable closing tags");
hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
+ *balanced = FALSE;
+ }
+ else {
+ *balanced = TRUE;
}
}
else {
@@ -1386,7 +1398,7 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc,
{
const guchar *p, *c, *end, *tag_start = NULL, *savep = NULL;
guchar t;
- gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE;
+ gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE, balanced;
GByteArray *dest;
guint obrace = 0, ebrace = 0;
GNode *cur_level = NULL;
@@ -1678,18 +1690,29 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc,
savep = NULL;
if (cur_tag != NULL) {
- if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level)) {
+ balanced = TRUE;
+
+ if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level,
+ &balanced)) {
state = content_write;
need_decode = FALSE;
}
else {
state = content_ignore;
}
+
+ if ((cur_tag->id == Tag_P || cur_tag->id == Tag_BR ||
+ cur_tag->id == Tag_HR) && balanced) {
+ /* Insert newline */
+ g_byte_array_append (dest, "\r\n", 2);
+ save_space = FALSE;
+ }
}
else {
state = content_write;
}
+
p++;
c = p;
cur_tag = NULL;
diff --git a/test/lua/unit/html.lua b/test/lua/unit/html.lua
index 22a03f6d6..f29d4eb3b 100644
--- a/test/lua/unit/html.lua
+++ b/test/lua/unit/html.lua
@@ -21,7 +21,7 @@ context("HTML processing", function()
<b>stuff</p>?
</body>
</html>
- ]], 'Hello, world! test data stuff?'},
+ ]], "Hello, world! test\r\ndata\r\nstuff?"},
{[[
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE html
@@ -39,7 +39,7 @@ context("HTML processing", function()
</p>
</body>
- </html>]], 'Hello, world!'},
+ </html>]], '\r\nHello, world!\r\n'},
{[[
<!DOCTYPE html>
<html lang="en">
@@ -53,13 +53,10 @@ context("HTML processing", function()
--></head>
<body>
<!-- page content -->
- Hello, world! <b>test</b>
- <p>data<>
- </P>
- <b>stuff</p>?
+ Hello, world!
</body>
</html>
- ]], 'Hello, world! test data stuff?'},
+ ]], 'Hello, world!'},
}
for _,c in ipairs(cases) do