diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-06-23 19:05:58 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-06-23 19:05:58 +0400 |
commit | 5022c0333ffd8ce5eca3dc9e2679b612e2c9ce99 (patch) | |
tree | 4b7fd05c9f01700ea372fb941b87312fceece513 /src/html.c | |
parent | de94e18f57a0dccbab76efb3d574c0485a9e3700 (diff) | |
download | rspamd-5022c0333ffd8ce5eca3dc9e2679b612e2c9ce99.tar.gz rspamd-5022c0333ffd8ce5eca3dc9e2679b612e2c9ce99.zip |
* Fixes to fuzzy hashing logic, skip urls while estimating fuzzy hash
Fix tags stripping.
Fix phishing checks (ignore img tags).
Diffstat (limited to 'src/html.c')
-rw-r--r-- | src/html.c | 12 |
1 files changed, 5 insertions, 7 deletions
diff --git a/src/html.c b/src/html.c index e686570a0..3582022f8 100644 --- a/src/html.c +++ b/src/html.c @@ -687,7 +687,7 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url gchar tagbuf[128]; struct html_tag *tag; gsize len = 0; - gint off, rc; + gint rc; p = url_text; while (len < remain) { @@ -719,7 +719,7 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url p ++; } - if (url_try_text (task->task_pool, url_text, len, &off, &url_str) && url_str != NULL) { + if (url_try_text (task->task_pool, url_text, len, NULL, NULL, &url_str) && url_str != NULL) { new = memory_pool_alloc0 (task->task_pool, sizeof (struct uri)); if (new != NULL) { g_strstrip (url_str); @@ -864,13 +864,10 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i /* * Check for phishing */ - if ((p = strchr (c, '>')) != NULL ) { + if ((p = strchr (c, '>')) != NULL && id == Tag_A) { p ++; check_phishing (task, url, p, remain - (p - tag_text), id); } - if (part->html_urls && g_tree_lookup (part->html_urls, url_text) == NULL) { - g_tree_insert (part->html_urls, url_text, url); - } if (g_tree_lookup (task->urls, url) == NULL) { g_tree_insert (task->urls, url, url); } @@ -938,7 +935,8 @@ add_html_node (struct worker_task *task, memory_pool_t * pool, struct mime_text_ /* Skip some tags */ if (data->tag && (data->tag->id == Tag_STYLE || data->tag->id == Tag_SCRIPT || - data->tag->id == Tag_OBJECT)) { + data->tag->id == Tag_OBJECT || + data->tag->id == Tag_TITLE)) { return FALSE; } } |