diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-01-24 20:45:54 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-01-24 20:45:54 +0300 |
commit | 76b69f300d8372969b6143e3e269376229d03edf (patch) | |
tree | d9c4dc4bfed5635869f2c9d83e9ebb94d00903a1 /src/html.c | |
parent | b0d0a4ce50733ce162ce9738da2d416497f98763 (diff) | |
download | rspamd-76b69f300d8372969b6143e3e269376229d03edf.tar.gz rspamd-76b69f300d8372969b6143e3e269376229d03edf.zip |
* Many fixes to fuzzy hashes logic and tokenization.
Diffstat (limited to 'src/html.c')
-rw-r--r-- | src/html.c | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/src/html.c b/src/html.c index 42ed9dbfa..64ebe362e 100644 --- a/src/html.c +++ b/src/html.c @@ -839,12 +839,13 @@ add_html_node (struct worker_task *task, memory_pool_t * pool, struct mime_text_ new = construct_html_node (pool, tag_text, tag_len); if (new == NULL) { debug_task ("cannot construct HTML node for text '%s'", tag_text); - return -1; + return FALSE; } data = new->data; if (data->tag && (data->tag->id == Tag_A || data->tag->id == Tag_IMG) && ((data->flags & FL_CLOSING) == 0)) { parse_tag_url (task, part, data->tag->id, tag_text, tag_len); } + if (data->flags & FL_CLOSING) { if (!*cur_level) { debug_task ("bad parent node"); @@ -857,10 +858,15 @@ add_html_node (struct worker_task *task, memory_pool_t * pool, struct mime_text_ } } else { + g_node_append (*cur_level, new); if ((data->flags & FL_CLOSED) == 0) { *cur_level = new; } + /* Skip some tags */ + if (data->tag->id == Tag_STYLE || data->tag->id == Tag_SCRIPT || data->tag->id == Tag_OBJECT) { + return FALSE; + } } } |