diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-01-27 19:15:51 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-01-27 19:15:51 +0300 |
commit | a450d0faa8851a7df8dbb52788f99fe216f57c3d (patch) | |
tree | ed2f45a6fd083e7e3b833bbdd576874d122c5f69 /src | |
parent | ec5b7a84cfd158b8b6b5714b47c48028a9c29a6a (diff) | |
download | rspamd-a450d0faa8851a7df8dbb52788f99fe216f57c3d.tar.gz rspamd-a450d0faa8851a7df8dbb52788f99fe216f57c3d.zip |
* Add new hash for storing hash data in shared memory
* Add rwlocks implementation (primitive) in memory pool library
Diffstat (limited to 'src')
-rw-r--r-- | src/hash.c | 293 | ||||
-rw-r--r-- | src/hash.h | 62 | ||||
-rw-r--r-- | src/mem_pool.c | 64 | ||||
-rw-r--r-- | src/mem_pool.h | 27 | ||||
-rw-r--r-- | src/statfile.c | 37 | ||||
-rw-r--r-- | src/statfile.h | 5 | ||||
-rw-r--r-- | src/url.h | 6 |
7 files changed, 466 insertions, 28 deletions
diff --git a/src/hash.c b/src/hash.c new file mode 100644 index 000000000..2be705999 --- /dev/null +++ b/src/hash.c @@ -0,0 +1,293 @@ +/* + * ===================================================================================== + * + * Filename: hash.c + * + * Description: Hash table implementation that uses memory pools from mem_pool library + * + * Created: 27.01.2009 16:43:16 + * Compiler: gcc + * + * Author: Vsevolod Stakhov + * Company: Rambler + * + * ===================================================================================== + */ + +#include <sys/types.h> +#include <stdlib.h> +#include "hash.h" + +#define HASH_TABLE_MIN_SIZE 20 +#define HASH_TABLE_MAX_SIZE 13845163 + +/* + * Performs a lookup in the hash table. Virtually all hash operations + * will use this function internally. + */ +static inline struct rspamd_hash_node ** +rspamd_hash_lookup_node (rspamd_hash_t *hash, gconstpointer key, guint *hash_return) +{ + struct rspamd_hash_node **node_ptr, *node; + guint hash_value; + hash_value = (* hash->hash_func) (key); + + if (hash->shared) { + memory_pool_rlock_rwlock (hash->lock); + } + node_ptr = &hash->nodes[hash_value % hash->size]; + + if (hash_return) + *hash_return = hash_value; + + /* Hash table lookup needs to be fast. + * We therefore remove the extra conditional of testing + * whether to call the key_equal_func or not from + * the inner loop. + * + * Additional optimisation: first check if our full hash + * values are equal so we can avoid calling the full-blown + * key equality function in most cases. + */ + if (hash->key_equal_func) { + while ((node = *node_ptr)) { + if (node->key_hash == hash_value && hash->key_equal_func (node->key, key)) { + break; + } + node_ptr = &(*node_ptr)->next; + } + } else { + while ((node = *node_ptr)) { + if (node->key == key) { + break; + } + node_ptr = &(*node_ptr)->next; + } + } + if (hash->shared) { + memory_pool_runlock_rwlock (hash->lock); + } + return node_ptr; +} + +/* + * Removes a node from the hash table and updates the node count. + * No table resize is performed. + */ +static void +rspamd_hash_remove_node (rspamd_hash_t *hash, struct rspamd_hash_node ***node_ptr_ptr) +{ + struct rspamd_hash_node **node_ptr, *node; + + if (hash->shared) { + memory_pool_wlock_rwlock (hash->lock); + } + node_ptr = *node_ptr_ptr; + node = *node_ptr; + + *node_ptr = node->next; + + hash->nnodes--; + if (hash->shared) { + memory_pool_wunlock_rwlock (hash->lock); + } +} + +/* + * Resizes the hash table to the optimal size based on the number of + * nodes currently held. + */ +static void +rspamd_hash_resize (rspamd_hash_t *hash) +{ + struct rspamd_hash_node **new_nodes; + struct rspamd_hash_node *node, *next; + guint hash_val; + gint new_size, i; + + new_size = g_spaced_primes_closest (hash->nnodes); + new_size = CLAMP (new_size, HASH_TABLE_MIN_SIZE, HASH_TABLE_MAX_SIZE); + + new_nodes = memory_pool_alloc (hash->pool, sizeof (struct rspamd_hash_node *) * new_size); + + if (hash->shared) { + memory_pool_wlock_rwlock (hash->lock); + } + + for (i = 0; i < hash->size; i++) { + for (node = hash->nodes[i]; node; node = next) { + next = node->next; + hash_val = node->key_hash % new_size; + node->next = new_nodes[hash_val]; + new_nodes[hash_val] = node; + } + } + + hash->nodes = new_nodes; + hash->size = new_size; + + if (hash->shared) { + memory_pool_wunlock_rwlock (hash->lock); + } +} + +/* + * Resizes the hash table, if needed. + */ +static inline void +rspamd_hash_maybe_resize (rspamd_hash_t *hash) +{ + gint nnodes = hash->nnodes; + gint size = hash->size; + + if ((size >= 3 * nnodes && size > HASH_TABLE_MIN_SIZE) || + (3 * size <= nnodes && size < HASH_TABLE_MAX_SIZE)) { + rspamd_hash_resize (hash); + } +} + +/* Create new hash in specified pool */ +rspamd_hash_t* +rspamd_hash_new (memory_pool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func) +{ + rspamd_hash_t* hash; + + hash = memory_pool_alloc (pool, sizeof (rspamd_hash_t)); + hash->size = HASH_TABLE_MIN_SIZE; + hash->nnodes = 0; + hash->hash_func = hash_func ? hash_func : g_direct_hash; + hash->key_equal_func = key_equal_func; + hash->nodes = memory_pool_alloc (pool, sizeof (struct rspamd_hash_node*) * hash->size); + hash->shared = 0; + hash->pool = pool; + + return hash; +} + +/* + * Create new hash in specified pool using shared memory + */ +rspamd_hash_t* +rspamd_hash_new_shared (memory_pool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func) +{ + rspamd_hash_t* hash; + + hash = memory_pool_alloc_shared (pool, sizeof (rspamd_hash_t)); + hash->size = HASH_TABLE_MIN_SIZE; + hash->nnodes = 0; + hash->hash_func = hash_func ? hash_func : g_direct_hash; + hash->key_equal_func = key_equal_func; + hash->nodes = memory_pool_alloc (pool, sizeof (struct rspamd_hash_node*) * hash->size); + hash->shared = 1; + /* Get mutex from pool for locking on insert/remove operations */ + hash->lock = memory_pool_get_rwlock (pool); + hash->pool = pool; + + return hash; +} + +/* + * Insert item in hash + */ +void +rspamd_hash_insert (rspamd_hash_t *hash, gpointer key, gpointer value) +{ + struct rspamd_hash_node **node_ptr, *node; + guint key_hash; + + g_return_if_fail (hash != NULL); + node_ptr = rspamd_hash_lookup_node (hash, key, &key_hash); + + if (hash->shared) { + memory_pool_wlock_rwlock (hash->lock); + } + if ((node = *node_ptr)) { + node->key = key; + node->value = value; + } + else { + if (hash->shared) { + node = memory_pool_alloc_shared (hash->pool, sizeof (struct rspamd_hash_node)); + } + else { + node = memory_pool_alloc (hash->pool, sizeof (struct rspamd_hash_node)); + } + + node->key = key; + node->value = value; + node->key_hash = key_hash; + node->next = NULL; + + *node_ptr = node; + hash->nnodes ++; + rspamd_hash_maybe_resize (hash); + + } + if (hash->shared) { + memory_pool_wunlock_rwlock (hash->lock); + } + +} + +/* + * Remove item from hash + */ +gboolean +rspamd_hash_remove (rspamd_hash_t *hash, gpointer key) +{ + struct rspamd_hash_node **node_ptr; + + g_return_val_if_fail (hash != NULL, FALSE); + + node_ptr = rspamd_hash_lookup_node (hash, key, NULL); + if (*node_ptr == NULL) + return FALSE; + + rspamd_hash_remove_node (hash, &node_ptr); + rspamd_hash_maybe_resize (hash); + + return TRUE; +} + +/* + * Lookup item from hash + */ +gpointer +rspamd_hash_lookup (rspamd_hash_t *hash, gpointer key) +{ + struct rspamd_hash_node *node; + g_return_val_if_fail (hash != NULL, NULL); + + node = *rspamd_hash_lookup_node (hash, key, NULL); + + return node ? node->value : NULL; +} + +/* + * Iterate throught hash + */ +void +rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data) +{ + struct rspamd_hash_node *node; + gint i; + + g_return_if_fail (hash != NULL); + g_return_if_fail (func != NULL); + + if (hash->shared) { + memory_pool_rlock_rwlock (hash->lock); + } + for (i = 0; i < hash->size; i++) { + for (node = hash->nodes[i]; node; node = node->next) { + (* func) (node->key, node->value, user_data); + } + } + if (hash->shared) { + memory_pool_runlock_rwlock (hash->lock); + } +} + +/* + * vi:ts=4 + */ diff --git a/src/hash.h b/src/hash.h new file mode 100644 index 000000000..79931a37e --- /dev/null +++ b/src/hash.h @@ -0,0 +1,62 @@ +/* + * ===================================================================================== + * + * Filename: hash.h + * + * Description: Hash table implementation that uses memory pools from mem_pool library + * + * Created: 27.01.2009 16:31:11 + * Compiler: gcc + * + * Author: Vsevolod Stakhov + * Company: Rambler + * + * ===================================================================================== + */ + +#ifndef RSPAMD_HASH_H +#define RSPAMD_HASH_H + +#include <sys/types.h> +#include <glib.h> +#include "mem_pool.h" + +struct rspamd_hash_node { + gpointer key; + gpointer value; + guint key_hash; + struct rspamd_hash_node *next; +}; + +typedef struct rspamd_hash_s { + gint size; + gint nnodes; + struct rspamd_hash_node **nodes; + + GHashFunc hash_func; + GEqualFunc key_equal_func; + gint shared; + memory_pool_rwlock_t *lock; + memory_pool_t *pool; +} rspamd_hash_t; + +#define rspamd_hash_size(x) (x)->nnodes + +/* Create new hash in specified pool */ +rspamd_hash_t* rspamd_hash_new (memory_pool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func); +/* Create new hash in specified pool using shared memory */ +rspamd_hash_t* rspamd_hash_new_shared (memory_pool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func); +/* Insert item in hash */ +void rspamd_hash_insert (rspamd_hash_t *hash, gpointer key, gpointer value); +/* Remove item from hash */ +gboolean rspamd_hash_remove (rspamd_hash_t *hash, gpointer key); +/* Lookup item from hash */ +gpointer rspamd_hash_lookup (rspamd_hash_t *hash, gpointer key); +/* Iterate throught hash */ +void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data); + +#endif + +/* + * vi:ts=4 + */ diff --git a/src/mem_pool.c b/src/mem_pool.c index a074de62a..582b91f0c 100644 --- a/src/mem_pool.c +++ b/src/mem_pool.c @@ -239,10 +239,9 @@ memory_pool_find_pool (memory_pool_t *pool, void *pointer) return NULL; } -static void -memory_pool_spin (gint *mutex) +static inline void +__mutex_spin (gint *mutex) { - while (!g_atomic_int_compare_and_exchange (mutex, 0, 1)) { /* lock was aqquired */ #ifdef HAVE_NANOSLEEP struct timespec ts; @@ -257,6 +256,14 @@ memory_pool_spin (gint *mutex) #if !defined(HAVE_NANOSLEEP) && !defined(HAVE_SCHED_YIELD) # error No methods to spin are defined #endif + +} + +static void +memory_pool_mutex_spin (gint *mutex) +{ + while (!g_atomic_int_compare_and_exchange (mutex, 0, 1)) { + __mutex_spin (mutex); } } @@ -271,7 +278,7 @@ memory_pool_lock_shared (memory_pool_t *pool, void *pointer) return; } - memory_pool_spin (&chain->lock); + memory_pool_mutex_spin (&chain->lock); } void memory_pool_unlock_shared (memory_pool_t *pool, void *pointer) @@ -371,7 +378,7 @@ memory_pool_get_mutex (memory_pool_t *pool) void memory_pool_lock_mutex (gint *mutex) { - memory_pool_spin (mutex); + memory_pool_mutex_spin (mutex); } void @@ -380,6 +387,53 @@ memory_pool_unlock_mutex (gint *mutex) (void)g_atomic_int_dec_and_test (mutex); } +memory_pool_rwlock_t* +memory_pool_get_rwlock (memory_pool_t *pool) +{ + memory_pool_rwlock_t *lock; + + lock = memory_pool_alloc_shared (pool, sizeof (memory_pool_rwlock_t)); + lock->__r_lock = memory_pool_get_mutex (pool); + lock->__w_lock = memory_pool_get_mutex (pool); + + return lock; +} + +void +memory_pool_rlock_rwlock (memory_pool_rwlock_t *lock) +{ + /* Spin on write lock */ + while (g_atomic_int_get (lock->__w_lock)) { + __mutex_spin (lock->__w_lock); + } + + g_atomic_int_inc (lock->__r_lock); +} + +void +memory_pool_wlock_rwlock (memory_pool_rwlock_t *lock) +{ + /* Spin on write lock first */ + memory_pool_mutex_spin (lock->__w_lock); + /* Now we have write lock set up */ + /* Wait all readers */ + while (g_atomic_int_get (lock->__r_lock)) { + __mutex_spin (lock->__r_lock); + } +} + +void +memory_pool_runlock_rwlock (memory_pool_rwlock_t *lock) +{ + (void)g_atomic_int_dec_and_test (lock->__r_lock); +} + +void +memory_pool_wunlock_rwlock (memory_pool_rwlock_t *lock) +{ + (void)g_atomic_int_dec_and_test (lock->__w_lock); +} + /* * vi:ts=4 */ diff --git a/src/mem_pool.h b/src/mem_pool.h index 22388df70..cd1af2e77 100644 --- a/src/mem_pool.h +++ b/src/mem_pool.h @@ -41,19 +41,44 @@ typedef struct memory_pool_stat_s { size_t chunks_freed; } memory_pool_stat_t; +typedef struct memory_pool_rwlock_s { + gint *__r_lock; + gint *__w_lock; +} memory_pool_rwlock_t; + +/* Allocate new memory poll */ memory_pool_t* memory_pool_new (size_t size); + +/* Get memory from pool */ void* memory_pool_alloc (memory_pool_t* pool, size_t size); +/* Get memory and set it to zero */ void* memory_pool_alloc0 (memory_pool_t* pool, size_t size); +/* Make a copy of string in pool */ char* memory_pool_strdup (memory_pool_t* pool, const char *src); -void memory_pool_add_destructor (memory_pool_t *pool, pool_destruct_func func, void *data); + +/* Allocate piece of shared memory */ void* memory_pool_alloc_shared (memory_pool_t *pool, size_t size); +/* Lock and unlock chunk of shared memory in which pointer is placed */ void memory_pool_lock_shared (memory_pool_t *pool, void *pointer); void memory_pool_unlock_shared (memory_pool_t *pool, void *pointer); + +/* Add destructor callback to pool */ +void memory_pool_add_destructor (memory_pool_t *pool, pool_destruct_func func, void *data); +/* Delete pool, free all its chunks and call destructors chain */ void memory_pool_delete (memory_pool_t *pool); + +/* Mutexes operations */ gint* memory_pool_get_mutex (memory_pool_t *pool); void memory_pool_lock_mutex (gint *mutex); void memory_pool_unlock_mutex (gint *mutex); +/* Simple rwlock implementation */ +memory_pool_rwlock_t* memory_pool_get_rwlock (memory_pool_t *pool); +void memory_pool_rlock_rwlock (memory_pool_rwlock_t *lock); +void memory_pool_wlock_rwlock (memory_pool_rwlock_t *lock); +void memory_pool_runlock_rwlock (memory_pool_rwlock_t *lock); +void memory_pool_wunlock_rwlock (memory_pool_rwlock_t *lock); + void memory_pool_stat (memory_pool_stat_t *st); /* Get optimal pool size based on page size for this system */ diff --git a/src/statfile.c b/src/statfile.c index 830bc5960..82e17a0ec 100644 --- a/src/statfile.c +++ b/src/statfile.c @@ -71,17 +71,17 @@ statfile_pool_expire (statfile_pool_t *pool) { struct expiration_data exp; - if (g_hash_table_size (pool->files) == 0) { + if (rspamd_hash_size (pool->files) == 0) { return -1; } exp.pool = pool; exp.oldest = ULLONG_MAX; exp.filename = NULL; - g_hash_table_foreach (pool->files, pool_expiration_callback, &exp); + rspamd_hash_foreach (pool->files, pool_expiration_callback, &exp); if (exp.filename) { - statfile_pool_close (pool, exp.filename); + statfile_pool_close (pool, exp.filename, TRUE); } } @@ -94,8 +94,7 @@ statfile_pool_new (size_t max_size) bzero (new, sizeof (statfile_pool_t)); new->pool = memory_pool_new (memory_pool_get_size ()); new->max = max_size; - new->files = g_hash_table_new (g_str_hash, g_str_equal); - memory_pool_add_destructor (new->pool, (pool_destruct_func)g_hash_table_destroy, new->files); + new->files = rspamd_hash_new_shared (new->pool, g_str_hash, g_str_equal); return new; } @@ -106,7 +105,7 @@ statfile_pool_open (statfile_pool_t *pool, char *filename) struct stat st; stat_file_t *new_file; - if (g_hash_table_lookup (pool->files, filename) != NULL) { + if (rspamd_hash_lookup (pool->files, filename) != NULL) { msg_info ("statfile_pool_open: file %s is already opened", filename); return 0; } @@ -154,17 +153,17 @@ statfile_pool_open (statfile_pool_t *pool, char *filename) new_file->open_time = time (NULL); new_file->access_time = new_file->open_time; new_file->lock = memory_pool_get_mutex (pool->pool); - g_hash_table_insert (pool->files, new_file->filename, new_file); + rspamd_hash_insert (pool->files, new_file->filename, new_file); return 0; } int -statfile_pool_close (statfile_pool_t *pool, char *filename) +statfile_pool_close (statfile_pool_t *pool, char *filename, gboolean remove_hash) { stat_file_t *file; - if ((file = g_hash_table_lookup (pool->files, filename)) == NULL) { + if ((file = rspamd_hash_lookup (pool->files, filename)) == NULL) { msg_info ("statfile_pool_open: file %s is not opened", filename); return -1; } @@ -180,7 +179,9 @@ statfile_pool_close (statfile_pool_t *pool, char *filename) close (file->fd); } pool->occupied -= file->len; - g_hash_table_remove (pool->files, file->filename); + if (remove_hash) { + rspamd_hash_remove (pool->files, file->filename); + } } int @@ -195,7 +196,7 @@ statfile_pool_create (statfile_pool_t *pool, char *filename, size_t blocks) static struct stat_file_block block = {0, 0, 0, 0}; int fd; - if (g_hash_table_lookup (pool->files, filename) != NULL) { + if (rspamd_hash_lookup (pool->files, filename) != NULL) { msg_info ("statfile_pool_create: file %s is already opened", filename); return 0; } @@ -230,13 +231,13 @@ pool_delete_callback (gpointer key, gpointer value, void *data) { statfile_pool_t *pool = (statfile_pool_t *)data; - statfile_pool_close (pool, (char *)key); + statfile_pool_close (pool, (char *)key, FALSE); } void statfile_pool_delete (statfile_pool_t *pool) { - g_hash_table_foreach (pool->files, pool_delete_callback, pool); + rspamd_hash_foreach (pool->files, pool_delete_callback, pool); memory_pool_delete (pool->pool); g_free (pool); } @@ -246,7 +247,7 @@ statfile_pool_lock_file (statfile_pool_t *pool, char *filename) { stat_file_t *file; - if ((file = g_hash_table_lookup (pool->files, filename)) == NULL) { + if ((file = rspamd_hash_lookup (pool->files, filename)) == NULL) { msg_info ("statfile_pool_lock_file: file %s is not opened", filename); return; } @@ -259,7 +260,7 @@ statfile_pool_unlock_file (statfile_pool_t *pool, char *filename) { stat_file_t *file; - if ((file = g_hash_table_lookup (pool->files, filename)) == NULL) { + if ((file = rspamd_hash_lookup (pool->files, filename)) == NULL) { msg_info ("statfile_pool_unlock_file: file %s is not opened", filename); return; } @@ -276,7 +277,7 @@ statfile_pool_get_block (statfile_pool_t *pool, char *filename, uint32_t h1, uin unsigned int i, blocknum; u_char *c; - if ((file = g_hash_table_lookup (pool->files, filename)) == NULL) { + if ((file = rspamd_hash_lookup (pool->files, filename)) == NULL) { msg_info ("statfile_pool_get_block: file %s is not opened", filename); return 0; } @@ -316,7 +317,7 @@ statfile_pool_set_block (statfile_pool_t *pool, char *filename, uint32_t h1, uin unsigned int i, blocknum, oldest = 0; u_char *c; - if ((file = g_hash_table_lookup (pool->files, filename)) == NULL) { + if ((file = rspamd_hash_lookup (pool->files, filename)) == NULL) { msg_info ("statfile_pool_set_block: file %s is not opened", filename); return; } @@ -378,5 +379,5 @@ statfile_pool_set_block (statfile_pool_t *pool, char *filename, uint32_t h1, uin int statfile_pool_is_open (statfile_pool_t *pool, char *filename) { - return g_hash_table_lookup (pool->files, filename) != NULL; + return rspamd_hash_lookup (pool->files, filename) != NULL; } diff --git a/src/statfile.h b/src/statfile.h index ea0dc9965..eac83ca01 100644 --- a/src/statfile.h +++ b/src/statfile.h @@ -13,6 +13,7 @@ #include <stdint.h> #endif #include "mem_pool.h" +#include "hash.h" #define CHAIN_LENGTH 128 @@ -48,7 +49,7 @@ typedef struct stat_file_s { } stat_file_t; typedef struct statfile_pool_s { - GHashTable *files; + rspamd_hash_t *files; int opened; size_t max; size_t occupied; @@ -58,7 +59,7 @@ typedef struct statfile_pool_s { statfile_pool_t* statfile_pool_new (size_t max_size); int statfile_pool_open (statfile_pool_t *pool, char *filename); int statfile_pool_create (statfile_pool_t *pool, char *filename, size_t len); -int statfile_pool_close (statfile_pool_t *pool, char *filename); +int statfile_pool_close (statfile_pool_t *pool, char *filename, gboolean remove_hash); void statfile_pool_delete (statfile_pool_t *pool); void statfile_pool_lock_file (statfile_pool_t *pool, char *filename); void statfile_pool_unlock_file (statfile_pool_t *pool, char *filename); @@ -4,9 +4,11 @@ #include <sys/types.h> #include <sys/socket.h> -#ifndef HAVE_OWN_QUEUE_H +#include "config.h" +#if !defined(HAVE_OWN_QUEUE_H) && defined(HAVE_SYS_QUEUE_H) #include <sys/queue.h> -#else +#endif +#ifdef HAVE_OWN_QUEUE_H #include "queue.h" #endif |