]> source.dussan.org Git - rspamd.git/commitdiff
Rework rspamd hash.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 24 Aug 2014 16:03:24 +0000 (17:03 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 24 Aug 2014 16:03:24 +0000 (17:03 +0100)
src/libutil/hash.c
src/libutil/hash.h

index 0e8b1646ecf50f3b7c5043b07e37ec91731a9eb6..7e4e7f27e621726a0346d9804fbb25797807cf26 100644 (file)
 
 #include "config.h"
 #include "hash.h"
+#include "uthash_strcase.h"
+#include "utlist.h"
 
-#define HASH_TABLE_MIN_SIZE 19
-#define HASH_TABLE_MAX_SIZE 13845163
-
-/*
- * Performs a lookup in the hash table.  Virtually all hash operations
- * will use this function internally.
+/**
+ * LRU hashing
  */
-static inline struct rspamd_hash_node **
-rspamd_hash_lookup_node (rspamd_hash_t * hash,
-       gconstpointer key,
-       guint * hash_return)
-{
-       struct rspamd_hash_node **node_ptr, *node;
-       guint hash_value;
-       hash_value = (*hash->hash_func)(key);
 
-       if (hash->shared) {
-               rspamd_mempool_rlock_rwlock (hash->lock);
-       }
-       node_ptr = &hash->nodes[hash_value % hash->size];
-
-       if (hash_return)
-               *hash_return = hash_value;
-
-       /* Hash table lookup needs to be fast.
-        *  We therefore remove the extra conditional of testing
-        *  whether to call the key_equal_func or not from
-        *  the inner loop.
-        *
-        *  Additional optimisation: first check if our full hash
-        *  values are equal so we can avoid calling the full-blown
-        *  key equality function in most cases.
-        */
-       if (hash->key_equal_func) {
-               while ((node = *node_ptr)) {
-                       if (node->key_hash == hash_value &&
-                               hash->key_equal_func (node->key, key)) {
-                               break;
-                       }
-                       node_ptr = &(*node_ptr)->next;
-               }
-       }
-       else {
-               while ((node = *node_ptr)) {
-                       if (node->key == key) {
-                               break;
-                       }
-                       node_ptr = &(*node_ptr)->next;
-               }
-       }
-       if (hash->shared) {
-               rspamd_mempool_runlock_rwlock (hash->lock);
-       }
-       return node_ptr;
-}
+typedef struct rspamd_lru_element_s {
+       gpointer data;
+       gpointer key;
+       time_t store_time;
+       guint ttl;
+       rspamd_lru_hash_t *hash;
 
-/*
- * Removes a node from the hash table and updates the node count.
- * No table resize is performed.
- */
-static void
-rspamd_hash_remove_node (rspamd_hash_t * hash,
-       struct rspamd_hash_node ***node_ptr_ptr)
-{
-       struct rspamd_hash_node **node_ptr, *node;
+       UT_hash_handle hh;
+} rspamd_lru_element_t;
 
-       if (hash->shared) {
-               rspamd_mempool_wlock_rwlock (hash->lock);
-       }
-       node_ptr = *node_ptr_ptr;
-       node = *node_ptr;
+struct rspamd_lru_hash_s {
+       gint maxsize;
+       gint maxage;
+       GDestroyNotify value_destroy;
+       GDestroyNotify key_destroy;
 
-       *node_ptr = node->next;
+       rspamd_lru_element_t *elements;
+};
 
-       hash->nnodes--;
-       if (hash->shared) {
-               rspamd_mempool_wunlock_rwlock (hash->lock);
-       }
-}
 
-/*
- * Resizes the hash table to the optimal size based on the number of
- * nodes currently held.
- */
 static void
-rspamd_hash_resize (rspamd_hash_t * hash)
-{
-       struct rspamd_hash_node **new_nodes;
-       struct rspamd_hash_node *node, *next;
-       guint hash_val;
-       gint new_size, i;
-
-       new_size = g_spaced_primes_closest (hash->nnodes);
-       new_size = CLAMP (new_size, HASH_TABLE_MIN_SIZE, HASH_TABLE_MAX_SIZE);
-
-       if (hash->shared) {
-               new_nodes =
-                       rspamd_mempool_alloc_shared (hash->pool,
-                               sizeof (struct rspamd_hash_node *) * new_size);
-       }
-       else {
-               new_nodes =
-                       rspamd_mempool_alloc (hash->pool,
-                               sizeof (struct rspamd_hash_node *) * new_size);
-       }
-
-       if (hash->shared) {
-               rspamd_mempool_wlock_rwlock (hash->lock);
-       }
-
-       for (i = 0; i < hash->size; i++) {
-               for (node = hash->nodes[i]; node; node = next) {
-                       next = node->next;
-                       hash_val = node->key_hash % new_size;
-                       node->next = new_nodes[hash_val];
-                       new_nodes[hash_val] = node;
-               }
-       }
-
-       hash->nodes = new_nodes;
-       hash->size = new_size;
-
-       if (hash->shared) {
-               rspamd_mempool_wunlock_rwlock (hash->lock);
-       }
-}
-
-/*
- * Resizes the hash table, if needed.
- */
-static inline void
-rspamd_hash_maybe_resize (rspamd_hash_t * hash)
-{
-       gint nnodes = hash->nnodes;
-       gint size = hash->size;
-
-       if ((size >= 3 * nnodes &&
-               size > HASH_TABLE_MIN_SIZE) ||
-               (3 * size <= nnodes && size < HASH_TABLE_MAX_SIZE)) {
-               rspamd_hash_resize (hash);
-       }
-}
-
-/* Create new hash in specified pool */
-rspamd_hash_t *
-rspamd_hash_new (rspamd_mempool_t * pool,
-       GHashFunc hash_func,
-       GEqualFunc key_equal_func)
-{
-       rspamd_hash_t *hash;
-
-       hash = rspamd_mempool_alloc (pool, sizeof (rspamd_hash_t));
-       hash->size = HASH_TABLE_MIN_SIZE;
-       hash->nnodes = 0;
-       hash->hash_func = hash_func ? hash_func : g_direct_hash;
-       hash->key_equal_func = key_equal_func;
-       hash->nodes = rspamd_mempool_alloc0 (pool,
-                       sizeof (struct rspamd_hash_node *) * hash->size);
-       hash->shared = 0;
-       hash->pool = pool;
-
-       return hash;
-}
-
-/*
- * Create new hash in specified pool using shared memory
- */
-rspamd_hash_t *
-rspamd_hash_new_shared (rspamd_mempool_t * pool,
-       GHashFunc hash_func,
-       GEqualFunc key_equal_func,
-       gint size)
-{
-       rspamd_hash_t *hash;
-
-       hash = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_hash_t));
-       hash->size = size;
-       hash->nnodes = 0;
-       hash->hash_func = hash_func ? hash_func : g_direct_hash;
-       hash->key_equal_func = key_equal_func;
-       hash->nodes =
-               rspamd_mempool_alloc0_shared (pool,
-                       sizeof (struct rspamd_hash_node *) * hash->size);
-       hash->shared = 1;
-       /* Get mutex from pool for locking on insert/remove operations */
-       hash->lock = rspamd_mempool_get_rwlock (pool);
-       hash->pool = pool;
-
-       return hash;
-}
-
-/*
- * Insert item in hash
- */
-void
-rspamd_hash_insert (rspamd_hash_t * hash, gpointer key, gpointer value)
-{
-       struct rspamd_hash_node **node_ptr, *node;
-       guint key_hash;
-
-       g_return_if_fail (hash != NULL);
-       node_ptr = rspamd_hash_lookup_node (hash, key, &key_hash);
-
-       if (hash->shared) {
-               rspamd_mempool_wlock_rwlock (hash->lock);
-       }
-       if ((node = *node_ptr)) {
-               node->key = key;
-               node->value = value;
-       }
-       else {
-               if (hash->shared) {
-                       node =
-                               rspamd_mempool_alloc_shared (hash->pool,
-                                       sizeof (struct rspamd_hash_node));
-               }
-               else {
-                       node =
-                               rspamd_mempool_alloc (hash->pool,
-                                       sizeof (struct rspamd_hash_node));
-               }
-
-               node->key = key;
-               node->value = value;
-               node->key_hash = key_hash;
-               node->next = NULL;
-
-               *node_ptr = node;
-               hash->nnodes++;
-       }
-       if (hash->shared) {
-               rspamd_mempool_wunlock_rwlock (hash->lock);
-       }
-
-       if (!hash->shared) {
-               rspamd_hash_maybe_resize (hash);
-       }
-}
-
-/*
- * Remove item from hash
- */
-gboolean
-rspamd_hash_remove (rspamd_hash_t * hash, gpointer key)
-{
-       struct rspamd_hash_node **node_ptr;
-
-       g_return_val_if_fail (hash != NULL, FALSE);
-
-       node_ptr = rspamd_hash_lookup_node (hash, key, NULL);
-       if (*node_ptr == NULL)
-               return FALSE;
-
-       rspamd_hash_remove_node (hash, &node_ptr);
-       rspamd_hash_maybe_resize (hash);
-
-       return TRUE;
-}
-
-/*
- * Lookup item from hash
- */
-gpointer
-rspamd_hash_lookup (rspamd_hash_t * hash, gpointer key)
-{
-       struct rspamd_hash_node *node;
-       g_return_val_if_fail (hash != NULL, NULL);
-
-       node = *rspamd_hash_lookup_node (hash, key, NULL);
-
-       return node ? node->value : NULL;
-}
-
-/*
- * Iterate throught hash
- */
-void
-rspamd_hash_foreach (rspamd_hash_t * hash, GHFunc func, gpointer user_data)
+rspamd_lru_hash_destroy_node (gpointer v)
 {
-       struct rspamd_hash_node *node;
-       gint i;
+       rspamd_lru_element_t *node = v;
+       rspamd_lru_hash_t *hash;
 
-       g_return_if_fail (hash != NULL);
-       g_return_if_fail (func != NULL);
+       hash = node->hash;
 
-       if (hash->shared) {
-               rspamd_mempool_rlock_rwlock (hash->lock);
+       HASH_DELETE(hh, hash->elements, node);
+       if (hash->value_destroy) {
+               hash->value_destroy (node->data);
        }
-       for (i = 0; i < hash->size; i++) {
-               for (node = hash->nodes[i]; node; node = node->next) {
-                       (*func)(node->key, node->value, user_data);
-               }
-       }
-       if (hash->shared) {
-               rspamd_mempool_runlock_rwlock (hash->lock);
+       if (hash->key_destroy) {
+               hash->key_destroy (node->key);
        }
-}
 
-/**
- * LRU hashing
- */
-
-static void
-rspamd_lru_hash_destroy_node (gpointer v)
-{
-       rspamd_lru_element_t *node = v;
-
-       if (node->hash->value_destroy) {
-               node->hash->value_destroy (node->data);
-       }
-       g_queue_delete_link (node->hash->q, node->link);
        g_slice_free1 (sizeof (rspamd_lru_element_t), node);
 }
 
@@ -367,8 +98,7 @@ rspamd_lru_create_node (rspamd_lru_hash_t *hash,
  * @return new rspamd_hash object
  */
 rspamd_lru_hash_t *
-rspamd_lru_hash_new (GHashFunc hash_func,
-       GEqualFunc key_equal_func,
+rspamd_lru_hash_new (
        gint maxsize,
        gint maxage,
        GDestroyNotify key_destroy,
@@ -376,56 +106,12 @@ rspamd_lru_hash_new (GHashFunc hash_func,
 {
        rspamd_lru_hash_t *new;
 
-       new = g_malloc (sizeof (rspamd_lru_hash_t));
-       new->storage = g_hash_table_new_full (hash_func,
-                       key_equal_func,
-                       key_destroy,
-                       rspamd_lru_hash_destroy_node);
-       new->maxage = maxage;
-       new->maxsize = maxsize;
-       new->value_destroy = value_destroy;
-       new->key_destroy = NULL;
-       new->q = g_queue_new ();
-       new->insert_func = (lru_cache_insert_func)g_hash_table_replace;
-       new->lookup_func = (lru_cache_lookup_func)g_hash_table_lookup;
-       new->delete_func = (lru_cache_delete_func)g_hash_table_remove;
-       new->destroy_func = (lru_cache_destroy_func)g_hash_table_destroy;
-
-       return new;
-}
-/**
- * Create new lru hash with custom storage
- * @param maxsize maximum elements in a hash
- * @param maxage maximum age of elemnt
- * @param hash_func pointer to hash function
- * @param key_equal_func pointer to function for comparing keys
- * @return new rspamd_hash object
- */
-rspamd_lru_hash_t *
-rspamd_lru_hash_new_full (GHashFunc hash_func,
-       GEqualFunc key_equal_func,
-       gint maxsize,
-       gint maxage,
-       GDestroyNotify key_destroy,
-       GDestroyNotify value_destroy,
-       gpointer storage,
-       lru_cache_insert_func insert_func,
-       lru_cache_lookup_func lookup_func,
-       lru_cache_delete_func delete_func)
-{
-       rspamd_lru_hash_t *new;
-
-       new = g_malloc (sizeof (rspamd_lru_hash_t));
-       new->storage = storage;
+       new = g_slice_alloc (sizeof (rspamd_lru_hash_t));
+       new->elements = NULL;
        new->maxage = maxage;
        new->maxsize = maxsize;
        new->value_destroy = value_destroy;
        new->key_destroy = key_destroy;
-       new->q = g_queue_new ();
-       new->insert_func = insert_func;
-       new->lookup_func = lookup_func;
-       new->delete_func = delete_func;
-       new->destroy_func = NULL;
 
        return new;
 }
@@ -439,22 +125,26 @@ rspamd_lru_hash_new_full (GHashFunc hash_func,
 gpointer
 rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now)
 {
-       rspamd_lru_element_t *res;
+       rspamd_lru_element_t *res, *tmp;
 
-       if ((res = hash->lookup_func (hash->storage, key)) != NULL) {
+       HASH_FIND_STR (hash->elements, key, res);
+       if (res != NULL) {
                if (res->ttl != 0) {
                        if (now - res->store_time > res->ttl) {
-                               hash->delete_func (hash->storage, key);
+                               rspamd_lru_hash_destroy_node (res);
                                return NULL;
                        }
                }
                if (hash->maxage > 0) {
                        if (now - res->store_time > hash->maxage) {
-                               res = g_queue_peek_tail (hash->q);
                                /* Expire elements from queue tail */
-                               while (res != NULL && now - res->store_time > hash->maxage) {
-                                       hash->delete_func (hash->storage, res->key);
-                                       res = g_queue_peek_tail (hash->q);
+                               HASH_ITER (hh, hash->elements, res, tmp) {
+                                       if (now - res->store_time > hash->maxage) {
+                                               rspamd_lru_hash_destroy_node (res);
+                                       }
+                                       else {
+                                               break;
+                                       }
                                }
 
                                return NULL;
@@ -475,52 +165,47 @@ void
 rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value,
        time_t now, guint ttl)
 {
-       rspamd_lru_element_t *res;
+       rspamd_lru_element_t *res, *tmp;
        gint removed = 0;
 
-       if ((res = hash->lookup_func (hash->storage, key)) != NULL) {
-               hash->delete_func (hash->storage, res->key);
+       HASH_FIND_STR (hash->elements, key, res);
+       if (res != NULL) {
+               rspamd_lru_hash_destroy_node (res);
        }
        else {
                if (hash->maxsize > 0 &&
-                       (gint)g_queue_get_length (hash->q) >= hash->maxsize) {
+                       (gint)HASH_COUNT (hash->elements) >= hash->maxsize) {
                        /* Expire some elements */
-                       res = g_queue_peek_tail (hash->q);
                        if (hash->maxage > 0) {
-                               while (res != NULL && now - res->store_time > hash->maxage) {
-                                       if (res->key != NULL) {
-                                               hash->delete_func (hash->storage, res->key);
+                               HASH_ITER (hh, hash->elements, res, tmp) {
+                                       if (now - res->store_time > hash->maxage) {
+                                               rspamd_lru_hash_destroy_node (res);
+                                               removed ++;
                                        }
                                        else {
                                                break;
                                        }
-                                       res = g_queue_peek_tail (hash->q);
-                                       removed++;
                                }
                        }
                        if (removed == 0) {
-                               /* Remove explicitly */
-                               if (res->key != NULL) {
-                                       hash->delete_func (hash->storage, res->key);
-                               }
+                               rspamd_lru_hash_destroy_node (hash->elements);
                        }
                }
        }
 
        res = rspamd_lru_create_node (hash, key, value, now, ttl);
-       hash->insert_func (hash->storage, key, res);
-       g_queue_push_head (hash->q, res);
-       res->link = g_queue_peek_head_link (hash->q);
+       HASH_ADD (hh, hash->elements, key, strlen (key), res);
 }
 
 void
 rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash)
 {
-       if (hash->destroy_func) {
-               hash->destroy_func (hash->storage);
+       rspamd_lru_element_t *res, *tmp;
+
+       HASH_ITER (hh, hash->elements, res, tmp) {
+               rspamd_lru_hash_destroy_node (res);
        }
-       g_queue_free (hash->q);
-       g_free (hash);
+       g_slice_free1 (sizeof (rspamd_lru_hash_t), hash);
 }
 
 /*
index 7f4f82b43b947ef4470ea94a5390af6b3e1de40d..b7bf9ff18f797f360e7c6f05923a79a75e25ffbb 100644 (file)
 #ifndef RSPAMD_HASH_H
 #define RSPAMD_HASH_H
 
-#include "mem_pool.h"
+#include "config.h"
 
-struct rspamd_hash_node {
-       gpointer key;
-       gpointer value;
-       guint key_hash;
-       struct rspamd_hash_node *next;
-};
-
-typedef struct rspamd_hash_s {
-       gint size;
-       gint nnodes;
-       struct rspamd_hash_node **nodes;
-
-       GHashFunc hash_func;
-       GEqualFunc key_equal_func;
-       gint shared;
-       rspamd_mempool_rwlock_t *lock;
-       rspamd_mempool_t *pool;
-} rspamd_hash_t;
-
-typedef void (*lru_cache_insert_func)(gpointer storage, gpointer key,
-       gpointer value);
-typedef gpointer (*lru_cache_lookup_func)(gpointer storage, gpointer key);
-typedef gboolean (*lru_cache_delete_func)(gpointer storage, gpointer key);
-typedef void (*lru_cache_destroy_func)(gpointer storage);
-
-typedef struct rspamd_lru_hash_s {
-       gint maxsize;
-       gint maxage;
-       GDestroyNotify value_destroy;
-       GDestroyNotify key_destroy;
-       GQueue *q;
-       gpointer storage;
-       lru_cache_insert_func insert_func;
-       lru_cache_lookup_func lookup_func;
-       lru_cache_delete_func delete_func;
-       lru_cache_destroy_func destroy_func;
-} rspamd_lru_hash_t;
-
-typedef struct rspamd_lru_element_s {
-       gpointer data;
-       gpointer key;
-       time_t store_time;
-       guint ttl;
-       rspamd_lru_hash_t *hash;
-       GList *link;
-} rspamd_lru_element_t;
-
-
-#define rspamd_hash_size(x) (x)->nnodes
-
-/**
- * Create new hash in specified pool
- * @param pool memory pool object
- * @param hash_func pointer to hash function
- * @param key_equal_func pointer to function for comparing keys
- * @return new rspamd_hash object
- */
-rspamd_hash_t * rspamd_hash_new (rspamd_mempool_t *pool,
-       GHashFunc hash_func,
-       GEqualFunc key_equal_func);
-
-/**
- * Create new hash in specified pool using shared memory
- * @param pool memory pool object
- * @param hash_func pointer to hash function
- * @param key_equal_func pointer to function for comparing keys
- * @return new rspamd_hash object
- */
-rspamd_hash_t * rspamd_hash_new_shared (rspamd_mempool_t *pool,
-       GHashFunc hash_func,
-       GEqualFunc key_equal_func,
-       gint size);
-
-/**
- * Insert item in hash
- * @param hash hash object
- * @param key key to insert
- * @param value value of key
- */
-void rspamd_hash_insert (rspamd_hash_t *hash, gpointer key, gpointer value);
-
-/**
- * Remove item from hash
- * @param hash hash object
- * @param key key to delete
- */
-gboolean rspamd_hash_remove (rspamd_hash_t *hash, gpointer key);
-
-/**
- * Lookup item from hash
- * @param hash hash object
- * @param key key to find
- * @return value of key or NULL if key is not found
- */
-gpointer rspamd_hash_lookup (rspamd_hash_t *hash, gpointer key);
-
-/**
- * Iterate throught hash
- * @param hash hash object
- * @param func user's function that would be called for each key/value pair
- * @param user_data pointer to user's data that would be passed to user's function
- */
-void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data);
+struct rspamd_lru_hash_s;
+typedef struct rspamd_lru_hash_s rspamd_lru_hash_t;
 
 /**
  * Create new lru hash
@@ -121,31 +20,12 @@ void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data);
  * @param key_equal_func pointer to function for comparing keys
  * @return new rspamd_hash object
  */
-rspamd_lru_hash_t * rspamd_lru_hash_new (GHashFunc hash_func,
-       GEqualFunc key_equal_func,
+rspamd_lru_hash_t * rspamd_lru_hash_new (
        gint maxsize,
        gint maxage,
        GDestroyNotify key_destroy,
        GDestroyNotify value_destroy);
 
-/**
- * Create new lru hash with custom storage
- * @param maxsize maximum elements in a hash
- * @param maxage maximum age of elemnt
- * @param hash_func pointer to hash function
- * @param key_equal_func pointer to function for comparing keys
- * @return new rspamd_hash object
- */
-rspamd_lru_hash_t * rspamd_lru_hash_new_full (GHashFunc hash_func,
-       GEqualFunc key_equal_func,
-       gint maxsize,
-       gint maxage,
-       GDestroyNotify key_destroy,
-       GDestroyNotify value_destroy,
-       gpointer storage,
-       lru_cache_insert_func insert_func,
-       lru_cache_lookup_func lookup_func,
-       lru_cache_delete_func delete_func);
 /**
  * Lookup item from hash
  * @param hash hash object