From 471e72e7a1290918c759a5576bcb8883122d460d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 23 Sep 2015 17:08:32 +0100 Subject: [PATCH] Search for urls in query parts of html urls as well. --- src/libserver/html.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/libserver/html.c b/src/libserver/html.c index 0a46cef02..520c49d39 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1307,6 +1307,43 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag) return NULL; } +static void +rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, + GHashTable *target) +{ + gint nstate = 0; + struct rspamd_url *query_url; + gchar *url_str; + gint rc; + + if (url->querylen > 0) { + + if (rspamd_url_find (pool, url->query, url->querylen, NULL, NULL, + &url_str, TRUE, &nstate)) { + query_url = rspamd_mempool_alloc0 (pool, + sizeof (struct rspamd_url)); + + rc = rspamd_url_parse (query_url, + url_str, + strlen (url_str), + pool); + + if (rc == URI_ERRNO_OK && + url->hostlen > 0) { + msg_debug_pool ("found url %s in query of url" + " %*s", url_str, url->querylen, url->query); + + if (!g_hash_table_lookup (target, + query_url)) { + g_hash_table_insert (target, + query_url, + query_url); + } + } + } + } +} + static void rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, struct html_content *hc) @@ -1923,6 +1960,12 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, else { url = NULL; } + + if (turl == NULL && url != NULL) { + rspamd_process_html_url (pool, + url, + target_tbl); + } } href_offset = dest->len; -- 2.39.5