aboutsummaryrefslogtreecommitdiffstats
path: root/src/html.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-01-24 20:45:54 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-01-24 20:45:54 +0300
commit76b69f300d8372969b6143e3e269376229d03edf (patch)
treed9c4dc4bfed5635869f2c9d83e9ebb94d00903a1 /src/html.c
parentb0d0a4ce50733ce162ce9738da2d416497f98763 (diff)
downloadrspamd-76b69f300d8372969b6143e3e269376229d03edf.tar.gz
rspamd-76b69f300d8372969b6143e3e269376229d03edf.zip
* Many fixes to fuzzy hashes logic and tokenization.
Diffstat (limited to 'src/html.c')
-rw-r--r--src/html.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/html.c b/src/html.c
index 42ed9dbfa..64ebe362e 100644
--- a/src/html.c
+++ b/src/html.c
@@ -839,12 +839,13 @@ add_html_node (struct worker_task *task, memory_pool_t * pool, struct mime_text_
new = construct_html_node (pool, tag_text, tag_len);
if (new == NULL) {
debug_task ("cannot construct HTML node for text '%s'", tag_text);
- return -1;
+ return FALSE;
}
data = new->data;
if (data->tag && (data->tag->id == Tag_A || data->tag->id == Tag_IMG) && ((data->flags & FL_CLOSING) == 0)) {
parse_tag_url (task, part, data->tag->id, tag_text, tag_len);
}
+
if (data->flags & FL_CLOSING) {
if (!*cur_level) {
debug_task ("bad parent node");
@@ -857,10 +858,15 @@ add_html_node (struct worker_task *task, memory_pool_t * pool, struct mime_text_
}
}
else {
+
g_node_append (*cur_level, new);
if ((data->flags & FL_CLOSED) == 0) {
*cur_level = new;
}
+ /* Skip some tags */
+ if (data->tag->id == Tag_STYLE || data->tag->id == Tag_SCRIPT || data->tag->id == Tag_OBJECT) {
+ return FALSE;
+ }
}
}