]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Fix urls/emails distinguishing found in queries
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 16 Feb 2018 10:17:32 +0000 (10:17 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 16 Feb 2018 10:17:32 +0000 (10:17 +0000)
MFH: rspamd-1.6

src/libserver/html.c
src/libserver/url.c

index c21871a932511ff839f4820456ddada0d22b45a1..53c16708b361c161016afec625a2056269120d86 100644 (file)
@@ -1614,8 +1614,9 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag)
 
 static void
 rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
-               GHashTable *target)
+               GHashTable *tbl_urls, GHashTable *tbl_emails)
 {
+       GHashTable *target_tbl;
        struct rspamd_url *query_url, *existing;
        gchar *url_str;
        gint rc;
@@ -1633,13 +1634,20 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
                                        pool);
 
                        if (rc == URI_ERRNO_OK &&
-                                       url->hostlen > 0) {
+                                       query_url->hostlen > 0) {
                                msg_debug_html ("found url %s in query of url"
                                                " %*s", url_str, url->querylen, url->query);
 
-                               if ((existing = g_hash_table_lookup (target,
+                               if (query_url->protocol == PROTOCOL_MAILTO) {
+                                       target_tbl = tbl_emails;
+                               }
+                               else {
+                                       target_tbl = tbl_urls;
+                               }
+
+                               if ((existing = g_hash_table_lookup (target_tbl,
                                                query_url)) == NULL) {
-                                       g_hash_table_insert (target,
+                                       g_hash_table_insert (target_tbl,
                                                        query_url,
                                                        query_url);
                                }
@@ -2558,7 +2566,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                                                if (turl == NULL && url != NULL) {
                                                                        rspamd_process_html_url (pool,
                                                                                        url,
-                                                                                       target_tbl);
+                                                                                       urls, emails);
                                                                }
                                                        }
 
index 83ffd36d991f4f27902e055331a2069f4192a45d..272511a1caa05ca8d07aa5a784e8f74ebc5f1c4a 100644 (file)
@@ -2446,6 +2446,7 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
        struct rspamd_task *task;
        gchar *url_str = NULL;
        struct rspamd_url *query_url, *existing;
+       GHashTable *target_tbl = NULL;
        gint rc;
 
        task = cbd->task;
@@ -2457,26 +2458,25 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 
        if (url->protocol == PROTOCOL_MAILTO) {
                if (url->userlen > 0) {
-                       if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) {
-                               url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
-                               g_hash_table_insert (task->emails, url,
-                                               url);
-                       }
-                       else {
-                               existing->count ++;
-                       }
+                       target_tbl = task->emails;
                }
        }
        else {
-               if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) {
+               target_tbl = task->urls;
+       }
+
+       if (target_tbl) {
+               if ((existing = g_hash_table_lookup (target_tbl, url)) == NULL) {
                        url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
-                       g_hash_table_insert (task->urls, url, url);
+                       g_hash_table_insert (target_tbl, url, url);
                }
                else {
-                       existing->count ++;
+                       existing->count++;
                }
        }
 
+       target_tbl = NULL;
+
        cbd->part->exceptions = g_list_prepend (
                        cbd->part->exceptions,
                        ex);
@@ -2494,19 +2494,27 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
                                        task->task_pool);
 
                        if (rc == URI_ERRNO_OK &&
-                                       url->hostlen > 0) {
+                                       query_url->hostlen > 0) {
                                msg_debug_task ("found url %s in query of url"
                                                " %*s", url_str, url->querylen, url->query);
 
-                               if ((existing = g_hash_table_lookup (task->urls,
-                                               query_url)) == NULL) {
-                                       query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
-                                       g_hash_table_insert (task->urls,
-                                                       query_url,
-                                                       query_url);
+                               if (query_url->protocol == PROTOCOL_MAILTO) {
+                                       if (query_url->userlen > 0) {
+                                               target_tbl = task->emails;
+                                       }
                                }
                                else {
-                                       existing->count ++;
+                                       target_tbl = task->urls;
+                               }
+
+                               if (target_tbl) {
+                                       if ((existing = g_hash_table_lookup (target_tbl, query_url)) == NULL) {
+                                               url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
+                                               g_hash_table_insert (target_tbl, query_url, query_url);
+                                       }
+                                       else {
+                                               existing->count++;
+                                       }
                                }
                        }
                }