From 956a05fd4f371da80c85cdedfc5923e0350c901d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 24 Aug 2014 17:03:24 +0100 Subject: [PATCH] Rework rspamd hash. --- src/libutil/hash.c | 431 ++++++--------------------------------------- src/libutil/hash.h | 128 +------------- 2 files changed, 62 insertions(+), 497 deletions(-) diff --git a/src/libutil/hash.c b/src/libutil/hash.c index 0e8b1646e..7e4e7f27e 100644 --- a/src/libutil/hash.c +++ b/src/libutil/hash.c @@ -24,318 +24,49 @@ #include "config.h" #include "hash.h" +#include "uthash_strcase.h" +#include "utlist.h" -#define HASH_TABLE_MIN_SIZE 19 -#define HASH_TABLE_MAX_SIZE 13845163 - -/* - * Performs a lookup in the hash table. Virtually all hash operations - * will use this function internally. +/** + * LRU hashing */ -static inline struct rspamd_hash_node ** -rspamd_hash_lookup_node (rspamd_hash_t * hash, - gconstpointer key, - guint * hash_return) -{ - struct rspamd_hash_node **node_ptr, *node; - guint hash_value; - hash_value = (*hash->hash_func)(key); - if (hash->shared) { - rspamd_mempool_rlock_rwlock (hash->lock); - } - node_ptr = &hash->nodes[hash_value % hash->size]; - - if (hash_return) - *hash_return = hash_value; - - /* Hash table lookup needs to be fast. - * We therefore remove the extra conditional of testing - * whether to call the key_equal_func or not from - * the inner loop. - * - * Additional optimisation: first check if our full hash - * values are equal so we can avoid calling the full-blown - * key equality function in most cases. - */ - if (hash->key_equal_func) { - while ((node = *node_ptr)) { - if (node->key_hash == hash_value && - hash->key_equal_func (node->key, key)) { - break; - } - node_ptr = &(*node_ptr)->next; - } - } - else { - while ((node = *node_ptr)) { - if (node->key == key) { - break; - } - node_ptr = &(*node_ptr)->next; - } - } - if (hash->shared) { - rspamd_mempool_runlock_rwlock (hash->lock); - } - return node_ptr; -} +typedef struct rspamd_lru_element_s { + gpointer data; + gpointer key; + time_t store_time; + guint ttl; + rspamd_lru_hash_t *hash; -/* - * Removes a node from the hash table and updates the node count. - * No table resize is performed. - */ -static void -rspamd_hash_remove_node (rspamd_hash_t * hash, - struct rspamd_hash_node ***node_ptr_ptr) -{ - struct rspamd_hash_node **node_ptr, *node; + UT_hash_handle hh; +} rspamd_lru_element_t; - if (hash->shared) { - rspamd_mempool_wlock_rwlock (hash->lock); - } - node_ptr = *node_ptr_ptr; - node = *node_ptr; +struct rspamd_lru_hash_s { + gint maxsize; + gint maxage; + GDestroyNotify value_destroy; + GDestroyNotify key_destroy; - *node_ptr = node->next; + rspamd_lru_element_t *elements; +}; - hash->nnodes--; - if (hash->shared) { - rspamd_mempool_wunlock_rwlock (hash->lock); - } -} -/* - * Resizes the hash table to the optimal size based on the number of - * nodes currently held. - */ static void -rspamd_hash_resize (rspamd_hash_t * hash) -{ - struct rspamd_hash_node **new_nodes; - struct rspamd_hash_node *node, *next; - guint hash_val; - gint new_size, i; - - new_size = g_spaced_primes_closest (hash->nnodes); - new_size = CLAMP (new_size, HASH_TABLE_MIN_SIZE, HASH_TABLE_MAX_SIZE); - - if (hash->shared) { - new_nodes = - rspamd_mempool_alloc_shared (hash->pool, - sizeof (struct rspamd_hash_node *) * new_size); - } - else { - new_nodes = - rspamd_mempool_alloc (hash->pool, - sizeof (struct rspamd_hash_node *) * new_size); - } - - if (hash->shared) { - rspamd_mempool_wlock_rwlock (hash->lock); - } - - for (i = 0; i < hash->size; i++) { - for (node = hash->nodes[i]; node; node = next) { - next = node->next; - hash_val = node->key_hash % new_size; - node->next = new_nodes[hash_val]; - new_nodes[hash_val] = node; - } - } - - hash->nodes = new_nodes; - hash->size = new_size; - - if (hash->shared) { - rspamd_mempool_wunlock_rwlock (hash->lock); - } -} - -/* - * Resizes the hash table, if needed. - */ -static inline void -rspamd_hash_maybe_resize (rspamd_hash_t * hash) -{ - gint nnodes = hash->nnodes; - gint size = hash->size; - - if ((size >= 3 * nnodes && - size > HASH_TABLE_MIN_SIZE) || - (3 * size <= nnodes && size < HASH_TABLE_MAX_SIZE)) { - rspamd_hash_resize (hash); - } -} - -/* Create new hash in specified pool */ -rspamd_hash_t * -rspamd_hash_new (rspamd_mempool_t * pool, - GHashFunc hash_func, - GEqualFunc key_equal_func) -{ - rspamd_hash_t *hash; - - hash = rspamd_mempool_alloc (pool, sizeof (rspamd_hash_t)); - hash->size = HASH_TABLE_MIN_SIZE; - hash->nnodes = 0; - hash->hash_func = hash_func ? hash_func : g_direct_hash; - hash->key_equal_func = key_equal_func; - hash->nodes = rspamd_mempool_alloc0 (pool, - sizeof (struct rspamd_hash_node *) * hash->size); - hash->shared = 0; - hash->pool = pool; - - return hash; -} - -/* - * Create new hash in specified pool using shared memory - */ -rspamd_hash_t * -rspamd_hash_new_shared (rspamd_mempool_t * pool, - GHashFunc hash_func, - GEqualFunc key_equal_func, - gint size) -{ - rspamd_hash_t *hash; - - hash = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_hash_t)); - hash->size = size; - hash->nnodes = 0; - hash->hash_func = hash_func ? hash_func : g_direct_hash; - hash->key_equal_func = key_equal_func; - hash->nodes = - rspamd_mempool_alloc0_shared (pool, - sizeof (struct rspamd_hash_node *) * hash->size); - hash->shared = 1; - /* Get mutex from pool for locking on insert/remove operations */ - hash->lock = rspamd_mempool_get_rwlock (pool); - hash->pool = pool; - - return hash; -} - -/* - * Insert item in hash - */ -void -rspamd_hash_insert (rspamd_hash_t * hash, gpointer key, gpointer value) -{ - struct rspamd_hash_node **node_ptr, *node; - guint key_hash; - - g_return_if_fail (hash != NULL); - node_ptr = rspamd_hash_lookup_node (hash, key, &key_hash); - - if (hash->shared) { - rspamd_mempool_wlock_rwlock (hash->lock); - } - if ((node = *node_ptr)) { - node->key = key; - node->value = value; - } - else { - if (hash->shared) { - node = - rspamd_mempool_alloc_shared (hash->pool, - sizeof (struct rspamd_hash_node)); - } - else { - node = - rspamd_mempool_alloc (hash->pool, - sizeof (struct rspamd_hash_node)); - } - - node->key = key; - node->value = value; - node->key_hash = key_hash; - node->next = NULL; - - *node_ptr = node; - hash->nnodes++; - } - if (hash->shared) { - rspamd_mempool_wunlock_rwlock (hash->lock); - } - - if (!hash->shared) { - rspamd_hash_maybe_resize (hash); - } -} - -/* - * Remove item from hash - */ -gboolean -rspamd_hash_remove (rspamd_hash_t * hash, gpointer key) -{ - struct rspamd_hash_node **node_ptr; - - g_return_val_if_fail (hash != NULL, FALSE); - - node_ptr = rspamd_hash_lookup_node (hash, key, NULL); - if (*node_ptr == NULL) - return FALSE; - - rspamd_hash_remove_node (hash, &node_ptr); - rspamd_hash_maybe_resize (hash); - - return TRUE; -} - -/* - * Lookup item from hash - */ -gpointer -rspamd_hash_lookup (rspamd_hash_t * hash, gpointer key) -{ - struct rspamd_hash_node *node; - g_return_val_if_fail (hash != NULL, NULL); - - node = *rspamd_hash_lookup_node (hash, key, NULL); - - return node ? node->value : NULL; -} - -/* - * Iterate throught hash - */ -void -rspamd_hash_foreach (rspamd_hash_t * hash, GHFunc func, gpointer user_data) +rspamd_lru_hash_destroy_node (gpointer v) { - struct rspamd_hash_node *node; - gint i; + rspamd_lru_element_t *node = v; + rspamd_lru_hash_t *hash; - g_return_if_fail (hash != NULL); - g_return_if_fail (func != NULL); + hash = node->hash; - if (hash->shared) { - rspamd_mempool_rlock_rwlock (hash->lock); + HASH_DELETE(hh, hash->elements, node); + if (hash->value_destroy) { + hash->value_destroy (node->data); } - for (i = 0; i < hash->size; i++) { - for (node = hash->nodes[i]; node; node = node->next) { - (*func)(node->key, node->value, user_data); - } - } - if (hash->shared) { - rspamd_mempool_runlock_rwlock (hash->lock); + if (hash->key_destroy) { + hash->key_destroy (node->key); } -} -/** - * LRU hashing - */ - -static void -rspamd_lru_hash_destroy_node (gpointer v) -{ - rspamd_lru_element_t *node = v; - - if (node->hash->value_destroy) { - node->hash->value_destroy (node->data); - } - g_queue_delete_link (node->hash->q, node->link); g_slice_free1 (sizeof (rspamd_lru_element_t), node); } @@ -367,8 +98,7 @@ rspamd_lru_create_node (rspamd_lru_hash_t *hash, * @return new rspamd_hash object */ rspamd_lru_hash_t * -rspamd_lru_hash_new (GHashFunc hash_func, - GEqualFunc key_equal_func, +rspamd_lru_hash_new ( gint maxsize, gint maxage, GDestroyNotify key_destroy, @@ -376,56 +106,12 @@ rspamd_lru_hash_new (GHashFunc hash_func, { rspamd_lru_hash_t *new; - new = g_malloc (sizeof (rspamd_lru_hash_t)); - new->storage = g_hash_table_new_full (hash_func, - key_equal_func, - key_destroy, - rspamd_lru_hash_destroy_node); - new->maxage = maxage; - new->maxsize = maxsize; - new->value_destroy = value_destroy; - new->key_destroy = NULL; - new->q = g_queue_new (); - new->insert_func = (lru_cache_insert_func)g_hash_table_replace; - new->lookup_func = (lru_cache_lookup_func)g_hash_table_lookup; - new->delete_func = (lru_cache_delete_func)g_hash_table_remove; - new->destroy_func = (lru_cache_destroy_func)g_hash_table_destroy; - - return new; -} -/** - * Create new lru hash with custom storage - * @param maxsize maximum elements in a hash - * @param maxage maximum age of elemnt - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_lru_hash_t * -rspamd_lru_hash_new_full (GHashFunc hash_func, - GEqualFunc key_equal_func, - gint maxsize, - gint maxage, - GDestroyNotify key_destroy, - GDestroyNotify value_destroy, - gpointer storage, - lru_cache_insert_func insert_func, - lru_cache_lookup_func lookup_func, - lru_cache_delete_func delete_func) -{ - rspamd_lru_hash_t *new; - - new = g_malloc (sizeof (rspamd_lru_hash_t)); - new->storage = storage; + new = g_slice_alloc (sizeof (rspamd_lru_hash_t)); + new->elements = NULL; new->maxage = maxage; new->maxsize = maxsize; new->value_destroy = value_destroy; new->key_destroy = key_destroy; - new->q = g_queue_new (); - new->insert_func = insert_func; - new->lookup_func = lookup_func; - new->delete_func = delete_func; - new->destroy_func = NULL; return new; } @@ -439,22 +125,26 @@ rspamd_lru_hash_new_full (GHashFunc hash_func, gpointer rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now) { - rspamd_lru_element_t *res; + rspamd_lru_element_t *res, *tmp; - if ((res = hash->lookup_func (hash->storage, key)) != NULL) { + HASH_FIND_STR (hash->elements, key, res); + if (res != NULL) { if (res->ttl != 0) { if (now - res->store_time > res->ttl) { - hash->delete_func (hash->storage, key); + rspamd_lru_hash_destroy_node (res); return NULL; } } if (hash->maxage > 0) { if (now - res->store_time > hash->maxage) { - res = g_queue_peek_tail (hash->q); /* Expire elements from queue tail */ - while (res != NULL && now - res->store_time > hash->maxage) { - hash->delete_func (hash->storage, res->key); - res = g_queue_peek_tail (hash->q); + HASH_ITER (hh, hash->elements, res, tmp) { + if (now - res->store_time > hash->maxage) { + rspamd_lru_hash_destroy_node (res); + } + else { + break; + } } return NULL; @@ -475,52 +165,47 @@ void rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value, time_t now, guint ttl) { - rspamd_lru_element_t *res; + rspamd_lru_element_t *res, *tmp; gint removed = 0; - if ((res = hash->lookup_func (hash->storage, key)) != NULL) { - hash->delete_func (hash->storage, res->key); + HASH_FIND_STR (hash->elements, key, res); + if (res != NULL) { + rspamd_lru_hash_destroy_node (res); } else { if (hash->maxsize > 0 && - (gint)g_queue_get_length (hash->q) >= hash->maxsize) { + (gint)HASH_COUNT (hash->elements) >= hash->maxsize) { /* Expire some elements */ - res = g_queue_peek_tail (hash->q); if (hash->maxage > 0) { - while (res != NULL && now - res->store_time > hash->maxage) { - if (res->key != NULL) { - hash->delete_func (hash->storage, res->key); + HASH_ITER (hh, hash->elements, res, tmp) { + if (now - res->store_time > hash->maxage) { + rspamd_lru_hash_destroy_node (res); + removed ++; } else { break; } - res = g_queue_peek_tail (hash->q); - removed++; } } if (removed == 0) { - /* Remove explicitly */ - if (res->key != NULL) { - hash->delete_func (hash->storage, res->key); - } + rspamd_lru_hash_destroy_node (hash->elements); } } } res = rspamd_lru_create_node (hash, key, value, now, ttl); - hash->insert_func (hash->storage, key, res); - g_queue_push_head (hash->q, res); - res->link = g_queue_peek_head_link (hash->q); + HASH_ADD (hh, hash->elements, key, strlen (key), res); } void rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash) { - if (hash->destroy_func) { - hash->destroy_func (hash->storage); + rspamd_lru_element_t *res, *tmp; + + HASH_ITER (hh, hash->elements, res, tmp) { + rspamd_lru_hash_destroy_node (res); } - g_queue_free (hash->q); - g_free (hash); + g_slice_free1 (sizeof (rspamd_lru_hash_t), hash); } /* diff --git a/src/libutil/hash.h b/src/libutil/hash.h index 7f4f82b43..b7bf9ff18 100644 --- a/src/libutil/hash.h +++ b/src/libutil/hash.h @@ -7,111 +7,10 @@ #ifndef RSPAMD_HASH_H #define RSPAMD_HASH_H -#include "mem_pool.h" +#include "config.h" -struct rspamd_hash_node { - gpointer key; - gpointer value; - guint key_hash; - struct rspamd_hash_node *next; -}; - -typedef struct rspamd_hash_s { - gint size; - gint nnodes; - struct rspamd_hash_node **nodes; - - GHashFunc hash_func; - GEqualFunc key_equal_func; - gint shared; - rspamd_mempool_rwlock_t *lock; - rspamd_mempool_t *pool; -} rspamd_hash_t; - -typedef void (*lru_cache_insert_func)(gpointer storage, gpointer key, - gpointer value); -typedef gpointer (*lru_cache_lookup_func)(gpointer storage, gpointer key); -typedef gboolean (*lru_cache_delete_func)(gpointer storage, gpointer key); -typedef void (*lru_cache_destroy_func)(gpointer storage); - -typedef struct rspamd_lru_hash_s { - gint maxsize; - gint maxage; - GDestroyNotify value_destroy; - GDestroyNotify key_destroy; - GQueue *q; - gpointer storage; - lru_cache_insert_func insert_func; - lru_cache_lookup_func lookup_func; - lru_cache_delete_func delete_func; - lru_cache_destroy_func destroy_func; -} rspamd_lru_hash_t; - -typedef struct rspamd_lru_element_s { - gpointer data; - gpointer key; - time_t store_time; - guint ttl; - rspamd_lru_hash_t *hash; - GList *link; -} rspamd_lru_element_t; - - -#define rspamd_hash_size(x) (x)->nnodes - -/** - * Create new hash in specified pool - * @param pool memory pool object - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_hash_t * rspamd_hash_new (rspamd_mempool_t *pool, - GHashFunc hash_func, - GEqualFunc key_equal_func); - -/** - * Create new hash in specified pool using shared memory - * @param pool memory pool object - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_hash_t * rspamd_hash_new_shared (rspamd_mempool_t *pool, - GHashFunc hash_func, - GEqualFunc key_equal_func, - gint size); - -/** - * Insert item in hash - * @param hash hash object - * @param key key to insert - * @param value value of key - */ -void rspamd_hash_insert (rspamd_hash_t *hash, gpointer key, gpointer value); - -/** - * Remove item from hash - * @param hash hash object - * @param key key to delete - */ -gboolean rspamd_hash_remove (rspamd_hash_t *hash, gpointer key); - -/** - * Lookup item from hash - * @param hash hash object - * @param key key to find - * @return value of key or NULL if key is not found - */ -gpointer rspamd_hash_lookup (rspamd_hash_t *hash, gpointer key); - -/** - * Iterate throught hash - * @param hash hash object - * @param func user's function that would be called for each key/value pair - * @param user_data pointer to user's data that would be passed to user's function - */ -void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data); +struct rspamd_lru_hash_s; +typedef struct rspamd_lru_hash_s rspamd_lru_hash_t; /** * Create new lru hash @@ -121,31 +20,12 @@ void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data); * @param key_equal_func pointer to function for comparing keys * @return new rspamd_hash object */ -rspamd_lru_hash_t * rspamd_lru_hash_new (GHashFunc hash_func, - GEqualFunc key_equal_func, +rspamd_lru_hash_t * rspamd_lru_hash_new ( gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy); -/** - * Create new lru hash with custom storage - * @param maxsize maximum elements in a hash - * @param maxage maximum age of elemnt - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_lru_hash_t * rspamd_lru_hash_new_full (GHashFunc hash_func, - GEqualFunc key_equal_func, - gint maxsize, - gint maxage, - GDestroyNotify key_destroy, - GDestroyNotify value_destroy, - gpointer storage, - lru_cache_insert_func insert_func, - lru_cache_lookup_func lookup_func, - lru_cache_delete_func delete_func); /** * Lookup item from hash * @param hash hash object -- 2.39.5