Browse Source

[Fix] Fix urls/emails distinguishing found in queries

MFH: rspamd-1.6
tags/1.7.0
Vsevolod Stakhov 6 years ago
parent
commit
fc5dc785c4
2 changed files with 40 additions and 24 deletions
  1. 13
    5
      src/libserver/html.c
  2. 27
    19
      src/libserver/url.c

+ 13
- 5
src/libserver/html.c View File

@@ -1614,8 +1614,9 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag)

static void
rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
GHashTable *target)
GHashTable *tbl_urls, GHashTable *tbl_emails)
{
GHashTable *target_tbl;
struct rspamd_url *query_url, *existing;
gchar *url_str;
gint rc;
@@ -1633,13 +1634,20 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
pool);

if (rc == URI_ERRNO_OK &&
url->hostlen > 0) {
query_url->hostlen > 0) {
msg_debug_html ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);

if ((existing = g_hash_table_lookup (target,
if (query_url->protocol == PROTOCOL_MAILTO) {
target_tbl = tbl_emails;
}
else {
target_tbl = tbl_urls;
}

if ((existing = g_hash_table_lookup (target_tbl,
query_url)) == NULL) {
g_hash_table_insert (target,
g_hash_table_insert (target_tbl,
query_url,
query_url);
}
@@ -2558,7 +2566,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
if (turl == NULL && url != NULL) {
rspamd_process_html_url (pool,
url,
target_tbl);
urls, emails);
}
}


+ 27
- 19
src/libserver/url.c View File

@@ -2446,6 +2446,7 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
struct rspamd_task *task;
gchar *url_str = NULL;
struct rspamd_url *query_url, *existing;
GHashTable *target_tbl = NULL;
gint rc;

task = cbd->task;
@@ -2457,26 +2458,25 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,

if (url->protocol == PROTOCOL_MAILTO) {
if (url->userlen > 0) {
if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) {
url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (task->emails, url,
url);
}
else {
existing->count ++;
}
target_tbl = task->emails;
}
}
else {
if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) {
target_tbl = task->urls;
}

if (target_tbl) {
if ((existing = g_hash_table_lookup (target_tbl, url)) == NULL) {
url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (task->urls, url, url);
g_hash_table_insert (target_tbl, url, url);
}
else {
existing->count ++;
existing->count++;
}
}

target_tbl = NULL;

cbd->part->exceptions = g_list_prepend (
cbd->part->exceptions,
ex);
@@ -2494,19 +2494,27 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
task->task_pool);

if (rc == URI_ERRNO_OK &&
url->hostlen > 0) {
query_url->hostlen > 0) {
msg_debug_task ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);

if ((existing = g_hash_table_lookup (task->urls,
query_url)) == NULL) {
query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (task->urls,
query_url,
query_url);
if (query_url->protocol == PROTOCOL_MAILTO) {
if (query_url->userlen > 0) {
target_tbl = task->emails;
}
}
else {
existing->count ++;
target_tbl = task->urls;
}

if (target_tbl) {
if ((existing = g_hash_table_lookup (target_tbl, query_url)) == NULL) {
url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (target_tbl, query_url, query_url);
}
else {
existing->count++;
}
}
}
}

Loading…
Cancel
Save