Browse Source

Add logic to preserve newlines in HTML.

tags/1.0.0
Vsevolod Stakhov 9 years ago
parent
commit
80bce97605
2 changed files with 30 additions and 10 deletions
  1. 26
    3
      src/libserver/html.c
  2. 4
    7
      test/lua/unit/html.lua

+ 26
- 3
src/libserver/html.c View File



static gboolean static gboolean
rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc, rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc,
struct html_tag *tag, GNode **cur_level)
struct html_tag *tag, GNode **cur_level, gboolean *balanced)
{ {
GNode *nnode; GNode *nnode;




nnode = g_node_new (tag); nnode = g_node_new (tag);


if (tag->params) {
rspamd_mempool_add_destructor (pool,
(rspamd_mempool_destruct_t) g_list_free,
tag->params);
}

if (tag->flags & FL_CLOSING) { if (tag->flags & FL_CLOSING) {
if (!*cur_level) { if (!*cur_level) {
debug_task ("bad parent node"); debug_task ("bad parent node");
g_node_destroy (nnode);
return FALSE; return FALSE;
} }

g_node_append (*cur_level, nnode); g_node_append (*cur_level, nnode);


if (!rspamd_html_check_balance (nnode, cur_level)) { if (!rspamd_html_check_balance (nnode, cur_level)) {
debug_task ( debug_task (
"mark part as unbalanced as it has not pairable closing tags"); "mark part as unbalanced as it has not pairable closing tags");
hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED; hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
*balanced = FALSE;
}
else {
*balanced = TRUE;
} }
} }
else { else {
{ {
const guchar *p, *c, *end, *tag_start = NULL, *savep = NULL; const guchar *p, *c, *end, *tag_start = NULL, *savep = NULL;
guchar t; guchar t;
gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE;
gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE, balanced;
GByteArray *dest; GByteArray *dest;
guint obrace = 0, ebrace = 0; guint obrace = 0, ebrace = 0;
GNode *cur_level = NULL; GNode *cur_level = NULL;
savep = NULL; savep = NULL;


if (cur_tag != NULL) { if (cur_tag != NULL) {
if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level)) {
balanced = TRUE;

if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level,
&balanced)) {
state = content_write; state = content_write;
need_decode = FALSE; need_decode = FALSE;
} }
else { else {
state = content_ignore; state = content_ignore;
} }

if ((cur_tag->id == Tag_P || cur_tag->id == Tag_BR ||
cur_tag->id == Tag_HR) && balanced) {
/* Insert newline */
g_byte_array_append (dest, "\r\n", 2);
save_space = FALSE;
}
} }
else { else {
state = content_write; state = content_write;
} }



p++; p++;
c = p; c = p;
cur_tag = NULL; cur_tag = NULL;

+ 4
- 7
test/lua/unit/html.lua View File

<b>stuff</p>? <b>stuff</p>?
</body> </body>
</html> </html>
]], 'Hello, world! test data stuff?'},
]], "Hello, world! test\r\ndata\r\nstuff?"},
{[[ {[[
<?xml version="1.0" encoding="iso-8859-1"?> <?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE html <!DOCTYPE html
</p> </p>
</body> </body>
</html>]], 'Hello, world!'},
</html>]], '\r\nHello, world!\r\n'},
{[[ {[[
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
--></head> --></head>
<body> <body>
<!-- page content --> <!-- page content -->
Hello, world! <b>test</b>
<p>data<>
</P>
<b>stuff</p>?
Hello, world!
</body> </body>
</html> </html>
]], 'Hello, world! test data stuff?'},
]], 'Hello, world!'},
} }
for _,c in ipairs(cases) do for _,c in ipairs(cases) do

Loading…
Cancel
Save