diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-03-06 15:58:22 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-03-09 10:46:11 +0000 |
commit | c399a6013b8522fc28ed11839fae6cbe7062278a (patch) | |
tree | bea3bbc3092e3ddfe4fb7c13227134d72a8910b8 | |
parent | a8f11faf7f584916078d6fadb36e0c2f1984e2b0 (diff) | |
download | rspamd-c399a6013b8522fc28ed11839fae6cbe7062278a.tar.gz rspamd-c399a6013b8522fc28ed11839fae6cbe7062278a.zip |
[Rework] Urls: rework urls hash structure
-rw-r--r-- | src/libserver/url.c | 53 | ||||
-rw-r--r-- | src/libserver/url.h | 22 |
2 files changed, 69 insertions, 6 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index 043f523f0..3449310b2 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -214,6 +214,12 @@ struct url_matcher static_matchers[] = { URL_FLAG_NOHTML} }; +/* Hash table implementation */ +__KHASH_IMPL (rspamd_url_hash, kh_inline,struct rspamd_url *, char, false, + rspamd_url_hash, rspamd_urls_cmp); +__KHASH_IMPL (rspamd_url_host_hash, kh_inline,struct rspamd_url *, char, false, + rspamd_url_host_hash, rspamd_urls_host_cmp); + struct url_callback_data { const gchar *begin; gchar *url_str; @@ -3374,7 +3380,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, return TRUE; } -guint +inline guint rspamd_url_hash (gconstpointer u) { const struct rspamd_url *url = u; @@ -3387,7 +3393,7 @@ rspamd_url_hash (gconstpointer u) return 0; } -guint +inline guint rspamd_url_host_hash (gconstpointer u) { const struct rspamd_url *url = u; @@ -3401,7 +3407,7 @@ rspamd_url_host_hash (gconstpointer u) return 0; } -guint +inline guint rspamd_email_hash (gconstpointer u) { const struct rspamd_url *url = u; @@ -3421,7 +3427,7 @@ rspamd_email_hash (gconstpointer u) } /* Compare two emails for building emails tree */ -gboolean +inline gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b) { const struct rspamd_url *u1 = a, *u2 = b; @@ -3450,7 +3456,7 @@ rspamd_emails_cmp (gconstpointer a, gconstpointer b) return FALSE; } -gboolean +inline gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b) { const struct rspamd_url *u1 = a, *u2 = b; @@ -3466,7 +3472,7 @@ rspamd_urls_cmp (gconstpointer a, gconstpointer b) return r == 0; } -gboolean +inline gboolean rspamd_urls_host_cmp (gconstpointer a, gconstpointer b) { const struct rspamd_url *u1 = a, *u2 = b; @@ -3806,3 +3812,38 @@ rspamd_url_protocol_from_string (const gchar *str) return ret; } + + +bool +rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set, + struct rspamd_url *u) +{ + khiter_t k; + gint r; + + k = kh_put (rspamd_url_hash, set, u, &r); + + if (r == 0) { + struct rspamd_url *ex = kh_key (set, k); + + ex->count ++; + + return false; + } + + return true; +} + +bool +rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u) +{ + khiter_t k; + + k = kh_get (rspamd_url_hash, set, u); + + if (k == kh_end (set)) { + return false; + } + + return true; +}
\ No newline at end of file diff --git a/src/libserver/url.h b/src/libserver/url.h index 00f09ac30..358c61e16 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -4,6 +4,7 @@ #include "config.h" #include "mem_pool.h" +#include "khash.h" #include "fstring.h" #ifdef __cplusplus @@ -281,6 +282,27 @@ const gchar *rspamd_url_protocol_name (enum rspamd_url_protocol proto); */ enum rspamd_url_protocol rspamd_url_protocol_from_string (const gchar *str); +/* Defines sets of urls indexed by url as is */ +KHASH_DECLARE (rspamd_url_hash, struct rspamd_url *, char); +KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char); + +/* Convenience functions for url sets */ +/** + * Add an url to set or increase the existing url count + * @param set + * @param u + * @return true if a new url has been added + */ +bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set, + struct rspamd_url *u); +/** + * Checks if a url is in set + * @param set + * @param u + * @return + */ +bool rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u); + #ifdef __cplusplus } #endif |