Browse Source

Search for urls in query parts of html urls as well.

tags/1.0.2
Vsevolod Stakhov 8 years ago
parent
commit
471e72e7a1
1 changed files with 43 additions and 0 deletions
  1. 43
    0
      src/libserver/html.c

+ 43
- 0
src/libserver/html.c View File

@@ -1307,6 +1307,43 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag)
return NULL;
}

static void
rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
GHashTable *target)
{
gint nstate = 0;
struct rspamd_url *query_url;
gchar *url_str;
gint rc;

if (url->querylen > 0) {

if (rspamd_url_find (pool, url->query, url->querylen, NULL, NULL,
&url_str, TRUE, &nstate)) {
query_url = rspamd_mempool_alloc0 (pool,
sizeof (struct rspamd_url));

rc = rspamd_url_parse (query_url,
url_str,
strlen (url_str),
pool);

if (rc == URI_ERRNO_OK &&
url->hostlen > 0) {
msg_debug_pool ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);

if (!g_hash_table_lookup (target,
query_url)) {
g_hash_table_insert (target,
query_url,
query_url);
}
}
}
}
}

static void
rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
struct html_content *hc)
@@ -1923,6 +1960,12 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
else {
url = NULL;
}

if (turl == NULL && url != NULL) {
rspamd_process_html_url (pool,
url,
target_tbl);
}
}

href_offset = dest->len;

Loading…
Cancel
Save