Browse Source

[Feature] Add count to url structure

tags/1.6.0
Vsevolod Stakhov 7 years ago
parent
commit
c11f9e68e6
4 changed files with 44 additions and 15 deletions
  1. 9
    3
      src/libserver/html.c
  2. 29
    10
      src/libserver/url.c
  3. 1
    0
      src/libserver/url.h
  4. 5
    2
      src/plugins/surbl.c

+ 9
- 3
src/libserver/html.c View File

@@ -1578,7 +1578,7 @@ static void
rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
GHashTable *target)
{
struct rspamd_url *query_url;
struct rspamd_url *query_url, *existing;
gchar *url_str;
gint rc;

@@ -1599,12 +1599,15 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
msg_debug_html ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);

if (!g_hash_table_lookup (target,
query_url)) {
if ((existing = g_hash_table_lookup (target,
query_url)) == NULL) {
g_hash_table_insert (target,
query_url,
query_url);
}
else {
existing->count ++;
}
}
}
}
@@ -2102,6 +2105,8 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT;
}

turl->count ++;
}
else {
g_hash_table_insert (target_tbl,
@@ -2504,6 +2509,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
g_hash_table_insert (target_tbl, url, url);
}
else {
turl->count ++;
url = NULL;
}


+ 29
- 10
src/libserver/url.c View File

@@ -1491,6 +1491,7 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,

memset (uri, 0, sizeof (*uri));
memset (&u, 0, sizeof (u));
uri->count = 1;

if (*uristring == '\0') {
return URI_ERRNO_EMPTY;
@@ -2350,7 +2351,7 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
struct rspamd_process_exception *ex;
struct rspamd_task *task;
gchar *url_str = NULL;
struct rspamd_url *query_url;
struct rspamd_url *query_url, *existing;
gint rc;

task = cbd->task;
@@ -2362,18 +2363,24 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,

if (url->protocol == PROTOCOL_MAILTO) {
if (url->userlen > 0) {
if (!g_hash_table_lookup (task->emails, url)) {
if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) {
url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (task->emails, url,
url);
}
else {
existing->count ++;
}
}
}
else {
if (!g_hash_table_lookup (task->urls, url)) {
if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) {
url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (task->urls, url, url);
}
else {
existing->count ++;
}
}

cbd->part->exceptions = g_list_prepend (
@@ -2397,13 +2404,16 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
msg_debug_task ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);

if (!g_hash_table_lookup (task->urls,
query_url)) {
if ((existing = g_hash_table_lookup (task->urls,
query_url)) == NULL) {
query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
g_hash_table_insert (task->urls,
query_url,
query_url);
}
else {
existing->count ++;
}
}
}
}
@@ -2492,7 +2502,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
{
struct rspamd_task *task = ud;
gchar *url_str = NULL;
struct rspamd_url *query_url;
struct rspamd_url *query_url, *existing;
gint rc;

/* It is just a displayed URL, we should not check it for certain things */
@@ -2500,16 +2510,22 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,

if (url->protocol == PROTOCOL_MAILTO) {
if (url->userlen > 0) {
if (!g_hash_table_lookup (task->emails, url)) {
if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) {
g_hash_table_insert (task->emails, url,
url);
}
else {
existing->count ++;
}
}
}
else {
if (!g_hash_table_lookup (task->urls, url)) {
if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) {
g_hash_table_insert (task->urls, url, url);
}
else {
existing->count ++;
}
}

/* We also search the query for additional url inside */
@@ -2529,12 +2545,15 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
msg_debug_task ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);

if (!g_hash_table_lookup (task->urls,
query_url)) {
if ((existing = g_hash_table_lookup (task->urls,
query_url))) {
g_hash_table_insert (task->urls,
query_url,
query_url);
}
else {
existing->count ++;
}
}
}
}

+ 1
- 0
src/libserver/url.h View File

@@ -50,6 +50,7 @@ struct rspamd_url {
guint urllen;

enum rspamd_url_flags flags;
guint count;
GHashTable *tags;
};


+ 5
- 2
src/plugins/surbl.c View File

@@ -1354,7 +1354,7 @@ surbl_redirector_finish (struct rspamd_http_connection *conn,
struct redirector_param *param = (struct redirector_param *)conn->ud;
struct rspamd_task *task;
gint r, urllen;
struct rspamd_url *redirected_url;
struct rspamd_url *redirected_url, *existing;
const rspamd_ftok_t *hdr;
gchar *urlstr;

@@ -1378,12 +1378,15 @@ surbl_redirector_finish (struct rspamd_http_connection *conn,
task->task_pool);

if (r == URI_ERRNO_OK) {
if (!g_hash_table_lookup (task->urls, redirected_url)) {
if ((existing = g_hash_table_lookup (task->urls, redirected_url))) {
g_hash_table_insert (task->urls, redirected_url,
redirected_url);
redirected_url->phished_url = param->url;
redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
}
else {
existing->count ++;
}

rspamd_url_add_tag (param->url, "redirector", urlstr,
task->task_pool);

Loading…
Cancel
Save