From c11f9e68e6dc5891f9584a0a7e443153657b7737 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 6 Jun 2017 13:50:38 +0100 Subject: [PATCH] [Feature] Add count to url structure --- src/libserver/html.c | 12 +++++++++--- src/libserver/url.c | 39 +++++++++++++++++++++++++++++---------- src/libserver/url.h | 1 + src/plugins/surbl.c | 7 +++++-- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/libserver/html.c b/src/libserver/html.c index 40f8f9f64..186376567 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1578,7 +1578,7 @@ static void rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, GHashTable *target) { - struct rspamd_url *query_url; + struct rspamd_url *query_url, *existing; gchar *url_str; gint rc; @@ -1599,12 +1599,15 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, msg_debug_html ("found url %s in query of url" " %*s", url_str, url->querylen, url->query); - if (!g_hash_table_lookup (target, - query_url)) { + if ((existing = g_hash_table_lookup (target, + query_url)) == NULL) { g_hash_table_insert (target, query_url, query_url); } + else { + existing->count ++; + } } } } @@ -2102,6 +2105,8 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool, turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT; } + + turl->count ++; } else { g_hash_table_insert (target_tbl, @@ -2504,6 +2509,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, g_hash_table_insert (target_tbl, url, url); } else { + turl->count ++; url = NULL; } diff --git a/src/libserver/url.c b/src/libserver/url.c index 9f377edb9..8e0cb52ee 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1491,6 +1491,7 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, memset (uri, 0, sizeof (*uri)); memset (&u, 0, sizeof (u)); + uri->count = 1; if (*uristring == '\0') { return URI_ERRNO_EMPTY; @@ -2350,7 +2351,7 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, struct rspamd_process_exception *ex; struct rspamd_task *task; gchar *url_str = NULL; - struct rspamd_url *query_url; + struct rspamd_url *query_url, *existing; gint rc; task = cbd->task; @@ -2362,18 +2363,24 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, if (url->protocol == PROTOCOL_MAILTO) { if (url->userlen > 0) { - if (!g_hash_table_lookup (task->emails, url)) { + if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) { url->flags |= RSPAMD_URL_FLAG_FROM_TEXT; g_hash_table_insert (task->emails, url, url); } + else { + existing->count ++; + } } } else { - if (!g_hash_table_lookup (task->urls, url)) { + if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) { url->flags |= RSPAMD_URL_FLAG_FROM_TEXT; g_hash_table_insert (task->urls, url, url); } + else { + existing->count ++; + } } cbd->part->exceptions = g_list_prepend ( @@ -2397,13 +2404,16 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, msg_debug_task ("found url %s in query of url" " %*s", url_str, url->querylen, url->query); - if (!g_hash_table_lookup (task->urls, - query_url)) { + if ((existing = g_hash_table_lookup (task->urls, + query_url)) == NULL) { query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT; g_hash_table_insert (task->urls, query_url, query_url); } + else { + existing->count ++; + } } } } @@ -2492,7 +2502,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, { struct rspamd_task *task = ud; gchar *url_str = NULL; - struct rspamd_url *query_url; + struct rspamd_url *query_url, *existing; gint rc; /* It is just a displayed URL, we should not check it for certain things */ @@ -2500,16 +2510,22 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, if (url->protocol == PROTOCOL_MAILTO) { if (url->userlen > 0) { - if (!g_hash_table_lookup (task->emails, url)) { + if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) { g_hash_table_insert (task->emails, url, url); } + else { + existing->count ++; + } } } else { - if (!g_hash_table_lookup (task->urls, url)) { + if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) { g_hash_table_insert (task->urls, url, url); } + else { + existing->count ++; + } } /* We also search the query for additional url inside */ @@ -2529,12 +2545,15 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, msg_debug_task ("found url %s in query of url" " %*s", url_str, url->querylen, url->query); - if (!g_hash_table_lookup (task->urls, - query_url)) { + if ((existing = g_hash_table_lookup (task->urls, + query_url))) { g_hash_table_insert (task->urls, query_url, query_url); } + else { + existing->count ++; + } } } } diff --git a/src/libserver/url.h b/src/libserver/url.h index 14c0c5b69..e4834d9bc 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -50,6 +50,7 @@ struct rspamd_url { guint urllen; enum rspamd_url_flags flags; + guint count; GHashTable *tags; }; diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 9fef4d128..c84cfdc88 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -1354,7 +1354,7 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, struct redirector_param *param = (struct redirector_param *)conn->ud; struct rspamd_task *task; gint r, urllen; - struct rspamd_url *redirected_url; + struct rspamd_url *redirected_url, *existing; const rspamd_ftok_t *hdr; gchar *urlstr; @@ -1378,12 +1378,15 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, task->task_pool); if (r == URI_ERRNO_OK) { - if (!g_hash_table_lookup (task->urls, redirected_url)) { + if ((existing = g_hash_table_lookup (task->urls, redirected_url))) { g_hash_table_insert (task->urls, redirected_url, redirected_url); redirected_url->phished_url = param->url; redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED; } + else { + existing->count ++; + } rspamd_url_add_tag (param->url, "redirector", urlstr, task->task_pool); -- 2.39.5