]> source.dussan.org Git - rspamd.git/commitdiff
[Regression] Fix urls output in the protocol
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 4 Feb 2019 16:35:21 +0000 (16:35 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 4 Feb 2019 16:35:21 +0000 (16:35 +0000)
src/libserver/protocol.c
src/libserver/url.c
src/libserver/url.h

index ba468ee5f22722f547acc3ab04289f8b876c1756..5bcfbc37a1183a7cbd1fc2c2ae8d3ddf63a8016f 100644 (file)
@@ -435,14 +435,17 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
                                        }
                                }
                                IF_HEADER (URLS_HEADER) {
+                                       msg_debug_protocol ("read urls header, value: %V", hv);
+
                                        srch.begin = "extended";
                                        srch.len = 8;
 
-                                       msg_debug_protocol ("read urls header, value: %V", hv);
                                        if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) {
                                                task->flags |= RSPAMD_TASK_FLAG_EXT_URLS;
                                                msg_debug_protocol ("extended urls information");
                                        }
+
+                                       /* TODO: add more formats there */
                                }
                                IF_HEADER (USER_AGENT_HEADER) {
                                        msg_debug_protocol ("read user-agent header, value: %V", hv);
@@ -665,6 +668,7 @@ rspamd_protocol_handle_request (struct rspamd_task *task,
 /* Structure for writing tree data */
 struct tree_cb_data {
        ucl_object_t *top;
+       GHashTable *seen;
        struct rspamd_task *task;
 };
 
@@ -715,17 +719,37 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
        struct rspamd_url *url = value;
        ucl_object_t *obj;
        struct rspamd_task *task = cb->task;
-       const gchar *user_field = "unknown", *encoded;
+       const gchar *user_field = "unknown", *encoded = NULL;
        gboolean has_user = FALSE;
        guint len = 0;
-       gsize enclen;
-
-       encoded = rspamd_url_encode (url, &enclen, task->task_pool);
+       gsize enclen = 0;
 
        if (!(task->flags & RSPAMD_TASK_FLAG_EXT_URLS)) {
-               obj = ucl_object_fromlstring (encoded, enclen);
+               if (url->hostlen > 0) {
+                       if (g_hash_table_lookup (cb->seen, url)) {
+                               return;
+                       }
+
+                       const gchar *end = NULL;
+
+                       if (g_utf8_validate (url->host, url->hostlen, &end)) {
+                               obj = ucl_object_fromlstring (url->host, url->hostlen);
+                       }
+                       else if (end - url->host > 0) {
+                               obj = ucl_object_fromlstring (url->host, end - url->host);
+                       }
+                       else {
+                               return;
+                       }
+               }
+               else {
+                       return;
+               }
+
+               g_hash_table_insert (cb->seen, url, url);
        }
        else {
+               encoded = rspamd_url_encode (url, &enclen, task->task_pool);
                obj = rspamd_protocol_extended_url (task, url, encoded, enclen);
        }
 
@@ -742,6 +766,10 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
                        len = task->from_envelope->addr_len;
                }
 
+               if (!encoded) {
+                       encoded = rspamd_url_encode (url, &enclen, task->task_pool);
+               }
+
                msg_notice_task_encrypted ("<%s> %s: %*s; ip: %s; URL: %*s",
                        task->message_id,
                        has_user ? "user" : "from",
@@ -760,9 +788,12 @@ rspamd_urls_tree_ucl (GHashTable *input, struct rspamd_task *task)
        obj = ucl_object_typed_new (UCL_ARRAY);
        cb.top = obj;
        cb.task = task;
+       cb.seen = g_hash_table_new (rspamd_url_host_hash, rspamd_urls_host_cmp);
 
        g_hash_table_foreach (input, urls_protocol_cb, &cb);
 
+       g_hash_table_unref (cb.seen);
+
        return obj;
 }
 
@@ -1168,18 +1199,16 @@ rspamd_protocol_write_ucl (struct rspamd_task *task,
        }
 
        if (flags & RSPAMD_PROTOCOL_URLS) {
-               if (task->flags & RSPAMD_TASK_FLAG_EXT_URLS) {
-                       if (g_hash_table_size (task->urls) > 0) {
-                               ucl_object_insert_key (top,
-                                               rspamd_urls_tree_ucl (task->urls, task),
-                                               "urls", 0, false);
-                       }
+               if (g_hash_table_size (task->urls) > 0) {
+                       ucl_object_insert_key (top,
+                                       rspamd_urls_tree_ucl (task->urls, task),
+                                       "urls", 0, false);
+               }
 
-                       if (g_hash_table_size (task->emails) > 0) {
-                               ucl_object_insert_key (top,
-                                               rspamd_emails_tree_ucl (task->emails, task),
-                                               "emails", 0, false);
-                       }
+               if (g_hash_table_size (task->emails) > 0) {
+                       ucl_object_insert_key (top,
+                                       rspamd_emails_tree_ucl (task->emails, task),
+                                       "emails", 0, false);
                }
        }
 
@@ -1279,9 +1308,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg,
                rspamd_http_message_add_header (msg, hn->begin, hv->begin);
        }
 
-       if (task->cfg->log_urls || (task->flags & RSPAMD_TASK_FLAG_EXT_URLS)) {
-               flags |= RSPAMD_PROTOCOL_URLS;
-       }
+       flags |= RSPAMD_PROTOCOL_URLS;
 
        top = rspamd_protocol_write_ucl (task, flags);
 
index 4599f3ce18708b912e8c0554d5c7b5975baf8a5f..4dcd11c9e6db25685ecd848e7434a85e5fb6daab 100644 (file)
@@ -2971,15 +2971,26 @@ guint
 rspamd_url_hash (gconstpointer u)
 {
        const struct rspamd_url *url = u;
-       rspamd_cryptobox_fast_hash_state_t st;
-
-       rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
 
        if (url->urllen > 0) {
-               rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen);
+               return rspamd_cryptobox_fast_hash (url->string, url->urllen,
+                               rspamd_hash_seed ());
        }
 
-       return rspamd_cryptobox_fast_hash_final (&st);
+       return 0;
+}
+
+guint
+rspamd_url_host_hash (gconstpointer u)
+{
+       const struct rspamd_url *url = u;
+
+       if (url->hostlen > 0) {
+               return rspamd_cryptobox_fast_hash (url->host, url->hostlen,
+                               rspamd_hash_seed ());
+       }
+
+       return 0;
 }
 
 guint
@@ -3045,6 +3056,22 @@ rspamd_urls_cmp (gconstpointer a, gconstpointer b)
        return r == 0;
 }
 
+gboolean
+rspamd_urls_host_cmp (gconstpointer a, gconstpointer b)
+{
+       const struct rspamd_url *u1 = a, *u2 = b;
+       int r = 0;
+
+       if (u1->hostlen != u2->hostlen) {
+               return FALSE;
+       }
+       else {
+               r = memcmp (u1->host, u2->host, u1->hostlen);
+       }
+
+       return r == 0;
+}
+
 gsize
 rspamd_url_decode (gchar *dst, const gchar *src, gsize size)
 {
@@ -3255,8 +3282,15 @@ rspamd_url_encode (struct rspamd_url *url, gsize *pdlen,
        dest = rspamd_mempool_alloc (pool, dlen + 1);
        d = dest;
        dend = d + dlen;
-       d += rspamd_snprintf ((gchar *)d, dend - d,
-                       "%*s://", url->protocollen, rspamd_url_protocols[url->protocol].name);
+
+       if (url->protocollen > 0 &&
+               (url->protocol >= 0 && url->protocol < G_N_ELEMENTS (rspamd_url_protocols))) {
+               d += rspamd_snprintf ((gchar *) d, dend - d,
+                               "%*s://", url->protocollen, rspamd_url_protocols[url->protocol].name);
+       }
+       else {
+               d += rspamd_snprintf ((gchar *) d, dend - d, "http://");
+       }
 
        if (url->userlen > 0) {
                ENCODE_URL_COMPONENT ((guchar *)url->user, url->userlen,
index fa5c69f00b198dea73571ac7ff90a6075b4b8224..523fb2c1fb30e53a375bbadaafdd05ec699ff14a 100644 (file)
@@ -203,12 +203,15 @@ void rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag,
 
 guint rspamd_url_hash (gconstpointer u);
 guint rspamd_email_hash (gconstpointer u);
+guint rspamd_url_host_hash (gconstpointer u);
+
 
 /* Compare two emails for building emails hash */
 gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
 
 /* Compare two urls for building emails hash */
 gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
+gboolean rspamd_urls_host_cmp (gconstpointer a, gconstpointer b);
 
 /**
  * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated