diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-09-23 17:08:32 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-09-23 17:08:32 +0100 |
commit | 471e72e7a1290918c759a5576bcb8883122d460d (patch) | |
tree | af7d262fcac606a4ff3dfa4e60eeb8d9986373ec /src/libserver | |
parent | bb856bcce83358f7f3b0fd32afe43a6c6c5eae5e (diff) | |
download | rspamd-471e72e7a1290918c759a5576bcb8883122d460d.tar.gz rspamd-471e72e7a1290918c759a5576bcb8883122d460d.zip |
Search for urls in query parts of html urls as well.
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/html.c | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index 0a46cef02..520c49d39 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1308,6 +1308,43 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag) } static void +rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, + GHashTable *target) +{ + gint nstate = 0; + struct rspamd_url *query_url; + gchar *url_str; + gint rc; + + if (url->querylen > 0) { + + if (rspamd_url_find (pool, url->query, url->querylen, NULL, NULL, + &url_str, TRUE, &nstate)) { + query_url = rspamd_mempool_alloc0 (pool, + sizeof (struct rspamd_url)); + + rc = rspamd_url_parse (query_url, + url_str, + strlen (url_str), + pool); + + if (rc == URI_ERRNO_OK && + url->hostlen > 0) { + msg_debug_pool ("found url %s in query of url" + " %*s", url_str, url->querylen, url->query); + + if (!g_hash_table_lookup (target, + query_url)) { + g_hash_table_insert (target, + query_url, + query_url); + } + } + } + } +} + +static void rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, struct html_content *hc) { @@ -1923,6 +1960,12 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, else { url = NULL; } + + if (turl == NULL && url != NULL) { + rspamd_process_html_url (pool, + url, + target_tbl); + } } href_offset = dest->len; |