aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-06-06 13:50:38 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-06-06 13:50:38 +0100
commitc11f9e68e6dc5891f9584a0a7e443153657b7737 (patch)
tree0827669411f16a75cfb8b66369f6a78243b5a4c6
parentba8e4d7a11106893fa518aa96f46ba7c22205080 (diff)
downloadrspamd-c11f9e68e6dc5891f9584a0a7e443153657b7737.tar.gz
rspamd-c11f9e68e6dc5891f9584a0a7e443153657b7737.zip
[Feature] Add count to url structure
-rw-r--r--src/libserver/html.c12
-rw-r--r--src/libserver/url.c39
-rw-r--r--src/libserver/url.h1
-rw-r--r--src/plugins/surbl.c7
4 files changed, 44 insertions, 15 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 40f8f9f64..186376567 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1578,7 +1578,7 @@ static void
rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
GHashTable *target)
{
- struct rspamd_url *query_url;
+ struct rspamd_url *query_url, *existing;
gchar *url_str;
gint rc;
@@ -1599,12 +1599,15 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
msg_debug_html ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);
- if (!g_hash_table_lookup (target,
- query_url)) {
+ if ((existing = g_hash_table_lookup (target,
+ query_url)) == NULL) {
g_hash_table_insert (target,
query_url,
query_url);
}
+ else {
+ existing->count ++;
+ }
}
}
}
@@ -2102,6 +2105,8 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT;
}
+
+ turl->count ++;
}
else {
g_hash_table_insert (target_tbl,
@@ -2504,6 +2509,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
g_hash_table_insert (target_tbl, url, url);
}
else {
+ turl->count ++;
url = NULL;
}
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 9f377edb9..8e0cb52ee 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1491,6 +1491,7 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
memset (uri, 0, sizeof (*uri));
memset (&u, 0, sizeof (u));
+ uri->count = 1;
if (*uristring == '\0') {
return URI_ERRNO_EMPTY;
@@ -2350,7 +2351,7 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
struct rspamd_process_exception *ex;
struct rspamd_task *task;
gchar *url_str = NULL;
- struct rspamd_url *query_url;
+ struct rspamd_url *query_url, *existing;
gint rc;
task = cbd->task;
@@ -2362,18 +2363,24 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
if (url->protocol == PROTOCOL_MAILTO) {
if (url->userlen > 0) {
- if (!g_hash_table_lookup (task->emails, url)) {
+ if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) {
url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (task->emails, url,
url);
}
+ else {
+ existing->count ++;
+ }
}
}
else {
- if (!g_hash_table_lookup (task->urls, url)) {
+ if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) {
url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (task->urls, url, url);
}
+ else {
+ existing->count ++;
+ }
}
cbd->part->exceptions = g_list_prepend (
@@ -2397,13 +2404,16 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
msg_debug_task ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);
- if (!g_hash_table_lookup (task->urls,
- query_url)) {
+ if ((existing = g_hash_table_lookup (task->urls,
+ query_url)) == NULL) {
query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (task->urls,
query_url,
query_url);
}
+ else {
+ existing->count ++;
+ }
}
}
}
@@ -2492,7 +2502,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
{
struct rspamd_task *task = ud;
gchar *url_str = NULL;
- struct rspamd_url *query_url;
+ struct rspamd_url *query_url, *existing;
gint rc;
/* It is just a displayed URL, we should not check it for certain things */
@@ -2500,16 +2510,22 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
if (url->protocol == PROTOCOL_MAILTO) {
if (url->userlen > 0) {
- if (!g_hash_table_lookup (task->emails, url)) {
+ if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) {
g_hash_table_insert (task->emails, url,
url);
}
+ else {
+ existing->count ++;
+ }
}
}
else {
- if (!g_hash_table_lookup (task->urls, url)) {
+ if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) {
g_hash_table_insert (task->urls, url, url);
}
+ else {
+ existing->count ++;
+ }
}
/* We also search the query for additional url inside */
@@ -2529,12 +2545,15 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
msg_debug_task ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);
- if (!g_hash_table_lookup (task->urls,
- query_url)) {
+ if ((existing = g_hash_table_lookup (task->urls,
+ query_url))) {
g_hash_table_insert (task->urls,
query_url,
query_url);
}
+ else {
+ existing->count ++;
+ }
}
}
}
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 14c0c5b69..e4834d9bc 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -50,6 +50,7 @@ struct rspamd_url {
guint urllen;
enum rspamd_url_flags flags;
+ guint count;
GHashTable *tags;
};
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
index 9fef4d128..c84cfdc88 100644
--- a/src/plugins/surbl.c
+++ b/src/plugins/surbl.c
@@ -1354,7 +1354,7 @@ surbl_redirector_finish (struct rspamd_http_connection *conn,
struct redirector_param *param = (struct redirector_param *)conn->ud;
struct rspamd_task *task;
gint r, urllen;
- struct rspamd_url *redirected_url;
+ struct rspamd_url *redirected_url, *existing;
const rspamd_ftok_t *hdr;
gchar *urlstr;
@@ -1378,12 +1378,15 @@ surbl_redirector_finish (struct rspamd_http_connection *conn,
task->task_pool);
if (r == URI_ERRNO_OK) {
- if (!g_hash_table_lookup (task->urls, redirected_url)) {
+ if ((existing = g_hash_table_lookup (task->urls, redirected_url))) {
g_hash_table_insert (task->urls, redirected_url,
redirected_url);
redirected_url->phished_url = param->url;
redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
}
+ else {
+ existing->count ++;
+ }
rspamd_url_add_tag (param->url, "redirector", urlstr,
task->task_pool);