]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Urls: more rework of the urls sets
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 7 Mar 2020 12:15:51 +0000 (12:15 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 9 Mar 2020 10:46:11 +0000 (10:46 +0000)
src/libmime/message.c
src/libmime/message.h
src/libserver/html.h
src/libserver/protocol.c
src/libserver/re_cache.c
src/libserver/url.c
src/libserver/url.h

index a43e109b5636ebaa0b4ce9f4efe80aed1795679a..40b7fe8bcd291b1020620cbe148981b6628dd77a 100644 (file)
@@ -1048,8 +1048,7 @@ rspamd_message_dtor (struct rspamd_message *msg)
        g_ptr_array_unref (msg->text_parts);
        g_ptr_array_unref (msg->parts);
 
-       g_hash_table_unref (msg->urls);
-       g_hash_table_unref (msg->emails);
+       kh_destroy (rspamd_url_hash, msg->urls);
 }
 
 struct rspamd_message*
@@ -1060,10 +1059,7 @@ rspamd_message_new (struct rspamd_task *task)
        msg = rspamd_mempool_alloc0 (task->task_pool, sizeof (*msg));
 
        msg->raw_headers = rspamd_message_headers_new ();
-
-       msg->emails = g_hash_table_new (rspamd_email_hash, rspamd_emails_cmp);
-       msg->urls = g_hash_table_new (rspamd_url_hash, rspamd_urls_cmp);
-
+       msg->urls = kh_init (rspamd_url_hash);
        msg->parts = g_ptr_array_sized_new (4);
        msg->text_parts = g_ptr_array_sized_new (2);
        msg->task = task;
index 91d6e13d46f490230cc84923cae61f6bd6140be7..96ed9d5d489522bb37546760b25810192fd7aab7 100644 (file)
@@ -13,6 +13,7 @@
 #include "libcryptobox/cryptobox.h"
 #include "libmime/mime_headers.h"
 #include "libmime/content_type.h"
+#include "libserver/url.h"
 #include "libutil/ref.h"
 #include "libutil/str_util.h"
 
@@ -175,8 +176,7 @@ struct rspamd_message {
        GPtrArray *text_parts;                  /**< list of text parts                                                         */
        struct rspamd_message_raw_headers_content raw_headers_content;
        struct rspamd_received_header *received;        /**< list of received headers                                           */
-       GHashTable *urls;                                                       /**< list of parsed urls                                                        */
-       GHashTable *emails;                                                     /**< list of parsed emails                                                      */
+       khash_t (rspamd_url_hash) *urls;
        struct rspamd_mime_headers_table *raw_headers;  /**< list of raw headers                                                */
        struct rspamd_mime_header *headers_order;       /**< order of raw headers                                                       */
        struct rspamd_task *task;
index b369bd8907932de13e7cbb8ad5087117c42e1794..ee5c242cb18ab703a8fbe877b3a686a26afc0994 100644 (file)
@@ -141,7 +141,8 @@ GByteArray *rspamd_html_process_part (rspamd_mempool_t *pool,
 
 GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool,
                                                                                   struct html_content *hc,
-                                                                                  GByteArray *in, GList **exceptions, GHashTable *urls, GHashTable *emails);
+                                                                                  GByteArray *in, GList **exceptions,
+                                                                                  GHashTable *urls, GHashTable *emails);
 
 /*
  * Returns true if a specified tag has been seen in a part
index 739d3b950448ade345a8a6e1a2c3033d798bc5fa..35d50b909ef300d17b71c9e8092fad4569128643 100644 (file)
@@ -861,7 +861,7 @@ rspamd_protocol_handle_request (struct rspamd_task *task,
 /* Structure for writing tree data */
 struct tree_cb_data {
        ucl_object_t *top;
-       GHashTable *seen;
+       khash_t (rspamd_url_host_hash) *seen;
        struct rspamd_task *task;
 };
 
@@ -908,10 +908,8 @@ rspamd_protocol_extended_url (struct rspamd_task *task,
  * Callback for writing urls
  */
 static void
-urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
+urls_protocol_cb (struct rspamd_url *url, struct tree_cb_data *cb)
 {
-       struct tree_cb_data *cb = ud;
-       struct rspamd_url *url = value;
        ucl_object_t *obj;
        struct rspamd_task *task = cb->task;
        const gchar *user_field = "unknown", *encoded = NULL;
@@ -921,7 +919,7 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
 
        if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS)) {
                if (url->hostlen > 0) {
-                       if (g_hash_table_lookup (cb->seen, url)) {
+                       if (rspamd_url_host_set_has (cb->seen, url)) {
                                return;
                        }
 
@@ -941,7 +939,7 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
                        return;
                }
 
-               g_hash_table_insert (cb->seen, url, url);
+               rspamd_url_host_set_add (cb->seen, url);
        }
        else {
                encoded = rspamd_url_encode (url, &enclen, task->task_pool);
@@ -975,28 +973,32 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
 }
 
 static ucl_object_t *
-rspamd_urls_tree_ucl (GHashTable *input, struct rspamd_task *task)
+rspamd_urls_tree_ucl (khash_t (rspamd_url_hash) *set,
+               struct rspamd_task *task)
 {
        struct tree_cb_data cb;
        ucl_object_t *obj;
+       struct rspamd_url *u;
 
        obj = ucl_object_typed_new (UCL_ARRAY);
        cb.top = obj;
        cb.task = task;
-       cb.seen = g_hash_table_new (rspamd_url_host_hash, rspamd_urls_host_cmp);
+       cb.seen = kh_init (rspamd_url_host_hash);
 
-       g_hash_table_foreach (input, urls_protocol_cb, &cb);
+       kh_foreach_key (set, u, {
+               if (!(u->protocol & PROTOCOL_MAILTO)) {
+                       urls_protocol_cb (u, &cb);
+               }
+       });
 
-       g_hash_table_unref (cb.seen);
+       kh_destroy (rspamd_url_host_hash, cb.seen);
 
        return obj;
 }
 
 static void
-emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
+emails_protocol_cb (struct rspamd_url *url, struct tree_cb_data *cb)
 {
-       struct tree_cb_data *cb = ud;
-       struct rspamd_url *url = value;
        ucl_object_t *obj;
 
        if (url->userlen > 0 && url->hostlen > 0) {
@@ -1007,16 +1009,23 @@ emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
 }
 
 static ucl_object_t *
-rspamd_emails_tree_ucl (GHashTable *input, struct rspamd_task *task)
+rspamd_emails_tree_ucl (khash_t (rspamd_url_hash) *set,
+                                               struct rspamd_task *task)
 {
        struct tree_cb_data cb;
        ucl_object_t *obj;
+       struct rspamd_url *u;
 
        obj = ucl_object_typed_new (UCL_ARRAY);
        cb.top = obj;
        cb.task = task;
 
-       g_hash_table_foreach (input, emails_protocol_cb, &cb);
+       kh_foreach_key (set, u, {
+               if ((u->protocol & PROTOCOL_MAILTO)) {
+                       emails_protocol_cb (u, &cb);
+               }
+       });
+
 
        return obj;
 }
@@ -1446,15 +1455,12 @@ rspamd_protocol_write_ucl (struct rspamd_task *task,
        }
 
        if (flags & RSPAMD_PROTOCOL_URLS && task->message) {
-               if (g_hash_table_size (MESSAGE_FIELD (task, urls)) > 0) {
+               if (kh_size (MESSAGE_FIELD (task, urls)) > 0) {
                        ucl_object_insert_key (top,
                                        rspamd_urls_tree_ucl (MESSAGE_FIELD (task, urls), task),
                                        "urls", 0, false);
-               }
-
-               if (g_hash_table_size (MESSAGE_FIELD (task, emails)) > 0) {
                        ucl_object_insert_key (top,
-                                       rspamd_emails_tree_ucl (MESSAGE_FIELD (task, emails), task),
+                                       rspamd_emails_tree_ucl (MESSAGE_FIELD (task, urls), task),
                                        "emails", 0, false);
                }
        }
index 995af8ddf169becf9fbaa228c98dab1f4c73f593..257428720d1b3d2e9e82a4f26f266cba42310b44 100644 (file)
@@ -1053,7 +1053,6 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                gboolean is_strong)
 {
        guint ret = 0, i, re_id;
-       GHashTableIter it;
        struct rspamd_mime_header *rh;
        const gchar *in;
        const guchar **scvec;
@@ -1062,7 +1061,6 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
        struct rspamd_mime_text_part *text_part;
        struct rspamd_mime_part *mime_part;
        struct rspamd_url *url;
-       gpointer k, v;
        guint len, cnt;
        const gchar *class_name;
 
@@ -1164,17 +1162,18 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                }
                break;
        case RSPAMD_RE_URL:
-               cnt = g_hash_table_size (MESSAGE_FIELD (task, urls));
+               cnt = kh_size (MESSAGE_FIELD (task, urls));
 
                if (cnt > 0) {
                        scvec = g_malloc (sizeof (*scvec) * cnt);
                        lenvec = g_malloc (sizeof (*lenvec) * cnt);
-                       g_hash_table_iter_init (&it, MESSAGE_FIELD (task, urls));
                        i = 0;
                        raw = FALSE;
 
-                       while (g_hash_table_iter_next (&it, &k, &v)) {
-                               url = v;
+                       kh_foreach_key (MESSAGE_FIELD (task, urls), url, {
+                               if ((url->protocol & PROTOCOL_MAILTO)) {
+                                       continue;
+                               }
                                in = url->string;
                                len = url->urllen;
 
@@ -1182,7 +1181,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                                        scvec[i] = (guchar *) in;
                                        lenvec[i++] = len;
                                }
-                       }
+                       });
 
 #if 0
                        g_hash_table_iter_init (&it, MESSAGE_FIELD (task, emails));
@@ -1207,18 +1206,19 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                }
                break;
        case RSPAMD_RE_EMAIL:
-               cnt = g_hash_table_size (MESSAGE_FIELD (task, emails));
+               cnt = kh_size (MESSAGE_FIELD (task, urls));
 
                if (cnt > 0) {
                        scvec = g_malloc (sizeof (*scvec) * cnt);
                        lenvec = g_malloc (sizeof (*lenvec) * cnt);
-                       g_hash_table_iter_init (&it, MESSAGE_FIELD (task, emails));
                        i = 0;
                        raw = FALSE;
 
-                       while (g_hash_table_iter_next (&it, &k, &v)) {
-                               url = v;
+                       kh_foreach_key (MESSAGE_FIELD (task, urls), url, {
 
+                               if (!(url->protocol & PROTOCOL_MAILTO)) {
+                                       continue;
+                               }
                                if (url->userlen == 0 || url->hostlen == 0) {
                                        continue;
                                }
@@ -1227,7 +1227,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                                len = url->userlen + 1 + url->hostlen;
                                scvec[i] = (guchar *) in;
                                lenvec[i++] = len;
-                       }
+                       });
 
                        ret = rspamd_re_cache_process_regexp_data (rt, re,
                                        task, scvec, lenvec, i, raw, &processed_hyperscan);
index 3449310b282eb5c201b76e6be91decd441fd0ecf..505d1d15057cd78a2a98fba40786910ad8e716f4 100644 (file)
@@ -214,6 +214,13 @@ struct url_matcher static_matchers[] = {
                                URL_FLAG_NOHTML}
 };
 
+
+static inline khint_t rspamd_url_hash (struct rspamd_url *u);
+
+static inline khint_t rspamd_url_host_hash (struct rspamd_url * u);
+static inline bool rspamd_urls_cmp (struct rspamd_url *a, struct rspamd_url *b);
+static inline bool rspamd_urls_host_cmp (struct rspamd_url *a, struct rspamd_url *b);
+
 /* Hash table implementation */
 __KHASH_IMPL (rspamd_url_hash, kh_inline,struct rspamd_url *, char, false,
                rspamd_url_hash, rspamd_urls_cmp);
@@ -3116,7 +3123,6 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
        struct rspamd_task *task;
        gchar *url_str = NULL;
        struct rspamd_url *query_url, *existing;
-       GHashTable *target_tbl = NULL;
        gint rc;
        gboolean prefix_added;
 
@@ -3141,36 +3147,23 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
        }
 
        if (url->protocol == PROTOCOL_MAILTO) {
-               if (url->userlen > 0) {
-                       target_tbl = MESSAGE_FIELD (task, emails);
+               if (url->userlen == 0) {
+                       return FALSE;
                }
        }
-       else {
-               target_tbl = MESSAGE_FIELD (task, urls);
-       }
-
-       if (target_tbl) {
-               /* Also check max urls */
-               if (cbd->task->cfg && cbd->task->cfg->max_urls > 0) {
-                       if (g_hash_table_size (target_tbl) > cbd->task->cfg->max_urls) {
-                               msg_err_task ("part has too many URLs, we cannot process more: "
-                                                         "%d urls extracted ",
-                                               (guint)g_hash_table_size (target_tbl));
-
-                               return FALSE;
-                       }
-               }
+       /* Also check max urls */
+       if (cbd->task->cfg && cbd->task->cfg->max_urls > 0) {
+               if (kh_size (MESSAGE_FIELD (task, urls)) > cbd->task->cfg->max_urls) {
+                       msg_err_task ("part has too many URLs, we cannot process more: "
+                                                 "%d urls extracted ",
+                                       (guint)kh_size (MESSAGE_FIELD (task, urls)));
 
-               if ((existing = g_hash_table_lookup (target_tbl, url)) == NULL) {
-                       url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
-                       g_hash_table_insert (target_tbl, url, url);
-               }
-               else {
-                       existing->count++;
+                       return FALSE;
                }
        }
 
-       target_tbl = NULL;
+       url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
+       rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
 
        cbd->part->exceptions = g_list_prepend (
                        cbd->part->exceptions,
@@ -3178,7 +3171,8 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 
        /* We also search the query for additional url inside */
        if (url->querylen > 0) {
-               if (rspamd_url_find (task->task_pool, rspamd_url_query_unsafe (url), url->querylen,
+               if (rspamd_url_find (task->task_pool,
+                               rspamd_url_query_unsafe (url), url->querylen,
                                &url_str, RSPAMD_URL_FIND_ALL, NULL, &prefix_added)) {
                        query_url = rspamd_mempool_alloc0 (task->task_pool,
                                        sizeof (struct rspamd_url));
@@ -3198,23 +3192,13 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
                                }
 
                                if (query_url->protocol == PROTOCOL_MAILTO) {
-                                       if (query_url->userlen > 0) {
-                                               target_tbl = MESSAGE_FIELD (task, emails);
+                                       if (query_url->userlen == 0) {
+                                               return TRUE;
                                        }
                                }
-                               else {
-                                       target_tbl = MESSAGE_FIELD (task, urls);
-                               }
 
-                               if (target_tbl) {
-                                       if ((existing = g_hash_table_lookup (target_tbl, query_url)) == NULL) {
-                                               url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
-                                               g_hash_table_insert (target_tbl, query_url, query_url);
-                                       }
-                                       else {
-                                               existing->count++;
-                                       }
-                               }
+                               query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
+                               rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), query_url);
                        }
                }
        }
@@ -3321,27 +3305,13 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
        url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED|RSPAMD_URL_FLAG_SUBJECT;
 
        if (url->protocol == PROTOCOL_MAILTO) {
-               if (url->userlen > 0 && url->hostlen > 0) {
-                       if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, emails),
-                                       url)) == NULL) {
-                               g_hash_table_insert (MESSAGE_FIELD (task, emails), url,
-                                               url);
-                       }
-                       else {
-                               existing->count ++;
-                       }
-               }
-       }
-       else {
-               if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls),
-                               url)) == NULL) {
-                       g_hash_table_insert (MESSAGE_FIELD (task, urls), url, url);
-               }
-               else {
-                       existing->count ++;
+               if (url->userlen == 0) {
+                       return FALSE;
                }
        }
 
+       rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+
        /* We also search the query for additional url inside */
        if (url->querylen > 0) {
                if (rspamd_url_find (task->task_pool, rspamd_url_query_unsafe (url), url->querylen,
@@ -3364,15 +3334,14 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
                                        query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
                                }
 
-                               if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls),
-                                               query_url)) == NULL) {
-                                       g_hash_table_insert (MESSAGE_FIELD (task, urls),
-                                                       query_url,
-                                                       query_url);
-                               }
-                               else {
-                                       existing->count ++;
+                               if (query_url->protocol == PROTOCOL_MAILTO) {
+                                       if (query_url->userlen == 0) {
+                                               return TRUE;
+                                       }
                                }
+
+                               rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls),
+                                               query_url);
                        }
                }
        }
@@ -3380,26 +3349,22 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
        return TRUE;
 }
 
-inline guint
-rspamd_url_hash (gconstpointer u)
+static inline khint_t
+rspamd_url_hash (struct rspamd_url *url)
 {
-       const struct rspamd_url *url = u;
-
        if (url->urllen > 0) {
-               return (guint)rspamd_cryptobox_fast_hash (url->string, url->urllen,
+               return (khint_t)rspamd_cryptobox_fast_hash (url->string, url->urllen,
                                rspamd_hash_seed ());
        }
 
        return 0;
 }
 
-inline guint
-rspamd_url_host_hash (gconstpointer u)
+static inline khint_t
+rspamd_url_host_hash (struct rspamd_url *url)
 {
-       const struct rspamd_url *url = u;
-
        if (url->hostlen > 0) {
-               return (guint)rspamd_cryptobox_fast_hash (rspamd_url_host_unsafe (url),
+               return (khint_t)rspamd_cryptobox_fast_hash (rspamd_url_host_unsafe (url),
                                url->hostlen,
                                rspamd_hash_seed ());
        }
@@ -3407,30 +3372,10 @@ rspamd_url_host_hash (gconstpointer u)
        return 0;
 }
 
-inline guint
-rspamd_email_hash (gconstpointer u)
-{
-       const struct rspamd_url *url = u;
-       rspamd_cryptobox_fast_hash_state_t st;
-
-       rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
-
-       if (url->hostlen > 0) {
-               rspamd_cryptobox_fast_hash_update (&st, rspamd_url_host_unsafe (url), url->hostlen);
-       }
-
-       if (url->userlen > 0) {
-               rspamd_cryptobox_fast_hash_update (&st, rspamd_url_user_unsafe(url), url->userlen);
-       }
-
-       return (guint)rspamd_cryptobox_fast_hash_final (&st);
-}
-
 /* Compare two emails for building emails tree */
-inline gboolean
-rspamd_emails_cmp (gconstpointer a, gconstpointer b)
+static inline bool
+rspamd_emails_cmp (struct rspamd_url *u1, struct rspamd_url *u2)
 {
-       const struct rspamd_url *u1 = a, *u2 = b;
        gint r;
 
        if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
@@ -3456,30 +3401,32 @@ rspamd_emails_cmp (gconstpointer a, gconstpointer b)
        return FALSE;
 }
 
-inline gboolean
-rspamd_urls_cmp (gconstpointer a, gconstpointer b)
+static inline bool
+rspamd_urls_cmp (struct rspamd_url *u1, struct rspamd_url *u2)
 {
-       const struct rspamd_url *u1 = a, *u2 = b;
        int r = 0;
 
-       if (u1->urllen != u2->urllen) {
-               return FALSE;
+       if (u1->protocol != u2->protocol || u1->urllen != u2->urllen) {
+               return false;
        }
        else {
+               if (u1->protocol & PROTOCOL_MAILTO) {
+                       return rspamd_emails_cmp (u1, u2);
+               }
+
                r = memcmp (u1->string, u2->string, u1->urllen);
        }
 
        return r == 0;
 }
 
-inline gboolean
-rspamd_urls_host_cmp (gconstpointer a, gconstpointer b)
+static inline bool
+rspamd_urls_host_cmp (struct rspamd_url *u1, struct rspamd_url *u2)
 {
-       const struct rspamd_url *u1 = a, *u2 = b;
        int r = 0;
 
        if (u1->hostlen != u2->hostlen) {
-               return FALSE;
+               return false;
        }
        else {
                r = memcmp (rspamd_url_host_unsafe (u1), rspamd_url_host_unsafe (u2),
@@ -3834,6 +3781,22 @@ rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
        return true;
 }
 
+bool
+rspamd_url_host_set_add (khash_t (rspamd_url_host_hash) *set,
+                                                               struct rspamd_url *u)
+{
+       khiter_t k;
+       gint r;
+
+       k = kh_put (rspamd_url_host_hash, set, u, &r);
+
+       if (r == 0) {
+               return false;
+       }
+
+       return true;
+}
+
 bool
 rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u)
 {
@@ -3845,5 +3808,19 @@ rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u)
                return false;
        }
 
+       return true;
+}
+
+bool
+rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url *u)
+{
+       khiter_t k;
+
+       k = kh_get (rspamd_url_hash, set, u);
+
+       if (k == kh_end (set)) {
+               return false;
+       }
+
        return true;
 }
\ No newline at end of file
index 358c61e16fe29b8e4df6fc5ab0013a7341fe40f1..aff7ccf5fc26dde00f78a812ae5660bb2adae71e 100644 (file)
@@ -225,21 +225,6 @@ gboolean rspamd_url_task_subject_callback (struct rspamd_url *url,
                                                                           gsize start_offset,
                                                                           gsize end_offset, gpointer ud);
 
-guint rspamd_url_hash (gconstpointer u);
-
-guint rspamd_email_hash (gconstpointer u);
-
-guint rspamd_url_host_hash (gconstpointer u);
-
-
-/* Compare two emails for building emails hash */
-gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
-
-/* Compare two urls for building emails hash */
-gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
-
-gboolean rspamd_urls_host_cmp (gconstpointer a, gconstpointer b);
-
 /**
  * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
  * @param dst
@@ -295,6 +280,14 @@ KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
  */
 bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
                struct rspamd_url *u);
+/**
+ * Helper for url host set
+ * @param set
+ * @param u
+ * @return
+ */
+bool rspamd_url_host_set_add (khash_t (rspamd_url_host_hash) *set,
+                                                                        struct rspamd_url *u);
 /**
  * Checks if a url is in set
  * @param set
@@ -302,6 +295,7 @@ bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
  * @return
  */
 bool rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u);
+bool rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url *u);
 
 #ifdef  __cplusplus
 }