diff options
-rw-r--r-- | src/fuzzy_storage.c | 311 |
1 files changed, 53 insertions, 258 deletions
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index 46d4b3ff4..43dde2988 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -38,6 +38,7 @@ #include "bloom.h" #include "map.h" #include "fuzzy_storage.h" +#include <lmdb.h> /* This number is used as limit while comparing two fuzzy hashes, this value can vary from 0 to 100 */ #define LEV_LIMIT 99 @@ -75,8 +76,6 @@ worker_t fuzzy_worker = { SOCK_DGRAM /* UDP socket */ }; -static GQueue *hashes[BUCKETS]; -static GQueue *frequent; static GHashTable *static_hash; static rspamd_bloom_filter_t *bf; @@ -88,7 +87,6 @@ static struct event tev; static struct rspamd_stat *server_stat; struct rspamd_fuzzy_storage_ctx { - gboolean strict_hash; char *hashfile; gdouble expire; guint32 frequent_score; @@ -151,31 +149,15 @@ expire_nodes (gpointer *to_expire, gint expired_num, { gint i; struct rspamd_fuzzy_node *node; - GList *cur; - GQueue *head; for (i = 0; i < expired_num; i++) { - if (ctx->strict_hash) { - node = (struct rspamd_fuzzy_node *)to_expire[i]; - if (node->time != INVALID_NODE_TIME) { - server_stat->fuzzy_hashes_expired++; - } - server_stat->fuzzy_hashes--; - rspamd_bloom_del (bf, node->h.hash_pipe); - g_hash_table_remove (static_hash, &node->h); - } - else { - cur = (GList *)to_expire[i]; - node = (struct rspamd_fuzzy_node *)cur->data; - head = hashes[node->h.block_size % BUCKETS]; - g_queue_delete_link (head, cur); - rspamd_bloom_del (bf, node->h.hash_pipe); - if (node->time != INVALID_NODE_TIME) { - server_stat->fuzzy_hashes_expired++; - } - server_stat->fuzzy_hashes--; - g_slice_free1 (sizeof(struct rspamd_fuzzy_node), node); + node = (struct rspamd_fuzzy_node *)to_expire[i]; + if (node->time != INVALID_NODE_TIME) { + server_stat->fuzzy_hashes_expired++; } + server_stat->fuzzy_hashes--; + rspamd_bloom_del (bf, node->h.hash_pipe); + g_hash_table_remove (static_hash, &node->h); } } @@ -184,9 +166,8 @@ sync_cache (gpointer ud) { static const int max_expired = 8192; struct rspamd_worker *wrk = ud; - gint fd, i, expired_num = 0; + gint fd, expired_num = 0; gchar *filename, header[4]; - GList *cur; struct rspamd_fuzzy_node *node; gpointer *nodes_expired = NULL; guint64 expire, now; @@ -241,70 +222,29 @@ sync_cache (gpointer ud) goto end; } - if (ctx->strict_hash) { - rspamd_rwlock_reader_lock (ctx->tree_lock); - g_hash_table_iter_init (&iter, static_hash); + rspamd_rwlock_reader_lock (ctx->tree_lock); + g_hash_table_iter_init (&iter, static_hash); - while (g_hash_table_iter_next (&iter, NULL, (void **)&node)) { - if (node->time == INVALID_NODE_TIME || now - node->time > - expire) { - if (nodes_expired == NULL) { - nodes_expired = g_malloc ( + while (g_hash_table_iter_next (&iter, NULL, (void **)&node)) { + if (node->time == INVALID_NODE_TIME || now - node->time > + expire) { + if (nodes_expired == NULL) { + nodes_expired = g_malloc ( max_expired * sizeof (gpointer)); - } - - if (expired_num < max_expired) { - nodes_expired[expired_num++] = node; - } - continue; } - if (write (fd, node, sizeof (struct rspamd_fuzzy_node)) == -1) { - msg_err ("cannot write file %s: %s", filename, - strerror (errno)); - goto end; + + if (expired_num < max_expired) { + nodes_expired[expired_num++] = node; } + continue; } - rspamd_rwlock_reader_unlock (ctx->tree_lock); - } - else { - rspamd_rwlock_reader_lock (ctx->tree_lock); - cur = frequent->head; - while (cur) { - node = cur->data; - if (write (fd, node, sizeof(struct rspamd_fuzzy_node)) == -1) { - msg_err ("cannot write file %s: %s", filename, + if (write (fd, node, sizeof (struct rspamd_fuzzy_node)) == -1) { + msg_err ("cannot write file %s: %s", filename, strerror (errno)); - } - cur = g_list_next (cur); + goto end; } - for (i = 0; i < BUCKETS; i++) { - cur = hashes[i]->head; - while (cur) { - node = cur->data; - if (now - node->time > expire) { - if (nodes_expired == NULL) { - nodes_expired = - g_malloc (max_expired * sizeof (gpointer)); - } - - if (expired_num < max_expired) { - nodes_expired[expired_num++] = cur; - } - cur = g_list_next (cur); - continue; - } - if (write (fd, node, - sizeof(struct rspamd_fuzzy_node)) == -1) { - msg_err ( - "cannot write file %s: %s", filename, - strerror (errno)); - goto end; - } - cur = g_list_next (cur); - } - } - rspamd_rwlock_reader_unlock (ctx->tree_lock); } + rspamd_rwlock_reader_unlock (ctx->tree_lock); /* Now try to expire some nodes */ if (expired_num > 0) { @@ -361,7 +301,7 @@ sigusr2_handler (void *arg) static gboolean read_hashes_file (struct rspamd_worker *wrk) { - gint r, fd, i, version = 0; + gint r, fd, version = 0; struct stat st; gchar *filename, header[4]; gboolean touch_stat = TRUE; @@ -436,31 +376,13 @@ read_hashes_file (struct rspamd_worker *wrk) break; } } - if (ctx->strict_hash) { - g_hash_table_insert (static_hash, &node->h, node); - } - else { - if (node->value > (gint)ctx->frequent_score) { - g_queue_push_head (frequent, node); - } - else { - g_queue_push_head (hashes[node->h.block_size % BUCKETS], node); - } - } + g_hash_table_insert (static_hash, &node->h, node); rspamd_bloom_add (bf, node->h.hash_pipe); if (touch_stat) { server_stat->fuzzy_hashes++; } } - if (!ctx->strict_hash) { - /* Sort everything */ - g_queue_sort (frequent, compare_nodes, NULL); - for (i = 0; i < BUCKETS; i++) { - g_queue_sort (hashes[i], compare_nodes, NULL); - } - } - (void)rspamd_file_unlock (fd, FALSE); close (fd); @@ -480,79 +402,25 @@ static inline struct rspamd_fuzzy_node * check_hash_node (GQueue *hash, rspamd_fuzzy_t *s, gint update_value, guint64 time, struct rspamd_fuzzy_storage_ctx *ctx) { - GList *cur; struct rspamd_fuzzy_node *h; - gint prob = 0; - if (ctx->strict_hash) { - h = g_hash_table_lookup (static_hash, s); - if (h != NULL) { - if (h->time == INVALID_NODE_TIME) { - /* Node is expired */ - return NULL; - } - else if (update_value == 0 && time - h->time > ctx->expire) { - h->time = INVALID_NODE_TIME; - server_stat->fuzzy_hashes_expired++; - return NULL; - } - else if (h->h.block_size== s->block_size) { - msg_debug ("fuzzy hash was found in tree"); - if (update_value) { - h->value += update_value; - } - return h; + h = g_hash_table_lookup (static_hash, s); + if (h != NULL) { + if (h->time == INVALID_NODE_TIME) { + /* Node is expired */ + return NULL; + } + else if (update_value == 0 && time - h->time > ctx->expire) { + h->time = INVALID_NODE_TIME; + server_stat->fuzzy_hashes_expired++; + return NULL; + } + else if (h->h.block_size== s->block_size) { + msg_debug ("fuzzy hash was found in tree"); + if (update_value) { + h->value += update_value; } - } - } - else { - cur = frequent->head; - while (cur) { - h = cur->data; - if ((prob = rspamd_fuzzy_compare (&h->h, s)) > LEV_LIMIT) { - msg_info ("fuzzy hash was found, probability %d%%", prob); - if (h->time == INVALID_NODE_TIME) { - return NULL; - } - else if (update_value) { - msg_info ("new hash weight: %d", h->value); - h->value += update_value; - } - else if (time - h->time > ctx->expire) { - h->time = INVALID_NODE_TIME; - server_stat->fuzzy_hashes_expired++; - return NULL; - } - return h; - } - cur = g_list_next (cur); - } - - cur = hash->head; - while (cur) { - h = cur->data; - if ((prob = rspamd_fuzzy_compare (&h->h, s)) > LEV_LIMIT) { - msg_info ("fuzzy hash was found, probability %d%%", prob); - if (h->time == INVALID_NODE_TIME) { - return NULL; - } - else if (update_value) { - msg_info ("new hash weight: %d", h->value); - h->value += update_value; - } - else if (time - h->time > ctx->expire) { - h->time = INVALID_NODE_TIME; - server_stat->fuzzy_hashes_expired++; - return NULL; - } - if (h->value > (gint)ctx->frequent_score) { - g_queue_unlink (hash, cur); - g_queue_push_head_link (frequent, cur); - msg_info ("moved hash to frequent list"); - } - return h; - } - cur = g_list_next (cur); + return h; } } @@ -577,14 +445,7 @@ process_check_command (struct fuzzy_cmd *cmd, s.block_size = cmd->blocksize; rspamd_rwlock_reader_lock (ctx->tree_lock); - if (ctx->strict_hash) { - h = check_hash_node (NULL, &s, 0, time, ctx); - } - else { - h = - check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, 0, time, - ctx); - } + h = check_hash_node (NULL, &s, 0, time, ctx); rspamd_rwlock_reader_unlock (ctx->tree_lock); if (h == NULL) { @@ -609,12 +470,7 @@ add_hash_node (struct fuzzy_cmd *cmd, h->time = time; h->value = cmd->value; h->flag = cmd->flag; - if (ctx->strict_hash) { - g_hash_table_insert (static_hash, &h->h, h); - } - else { - g_queue_push_head (hashes[cmd->blocksize % BUCKETS], h); - } + g_hash_table_insert (static_hash, &h->h, h); rspamd_bloom_add (bf, cmd->hash); return h; @@ -633,16 +489,7 @@ update_hash (struct fuzzy_cmd *cmd, mods++; rspamd_rwlock_writer_lock (ctx->tree_lock); - if (ctx->strict_hash) { - n = check_hash_node (NULL, &s, cmd->value, time, ctx); - } - else { - n = check_hash_node (hashes[cmd->blocksize % BUCKETS], - &s, - cmd->value, - time, - ctx); - } + n = check_hash_node (NULL, &s, cmd->value, time, ctx); if (n == NULL) { /* Bloom false positive */ n = add_hash_node (cmd, time, ctx); @@ -682,43 +529,16 @@ static gboolean delete_hash (GQueue *hash, rspamd_fuzzy_t *s, struct rspamd_fuzzy_storage_ctx *ctx) { - GList *cur, *tmp; - struct rspamd_fuzzy_node *h; gboolean res = FALSE; - if (ctx->strict_hash) { - rspamd_rwlock_writer_lock (ctx->tree_lock); - if (g_hash_table_remove (static_hash, s)) { - rspamd_bloom_del (bf, s->hash_pipe); - msg_info ("fuzzy hash was successfully deleted"); - server_stat->fuzzy_hashes--; - mods++; - } - rspamd_rwlock_writer_unlock (ctx->tree_lock); - } - else { - rspamd_rwlock_writer_lock (ctx->tree_lock); - cur = hash->head; - - /* XXX: too slow way */ - while (cur) { - h = cur->data; - if (rspamd_fuzzy_compare (&h->h, s) > LEV_LIMIT) { - g_slice_free1 (sizeof (struct rspamd_fuzzy_node), h); - tmp = cur; - cur = g_list_next (cur); - g_queue_delete_link (hash, tmp); - rspamd_bloom_del (bf, s->hash_pipe); - msg_info ("fuzzy hash was successfully deleted"); - server_stat->fuzzy_hashes--; - mods++; - res = TRUE; - continue; - } - cur = g_list_next (cur); - } - rspamd_rwlock_writer_unlock (ctx->tree_lock); + rspamd_rwlock_writer_lock (ctx->tree_lock); + if (g_hash_table_remove (static_hash, s)) { + rspamd_bloom_del (bf, s->hash_pipe); + msg_info ("fuzzy hash was successfully deleted"); + server_stat->fuzzy_hashes--; + mods++; } + rspamd_rwlock_writer_unlock (ctx->tree_lock); return res; @@ -730,7 +550,6 @@ process_delete_command (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *ctx) { rspamd_fuzzy_t s; - gboolean res = FALSE; if (!rspamd_bloom_check (bf, cmd->hash)) { return FALSE; @@ -738,20 +557,8 @@ process_delete_command (struct fuzzy_cmd *cmd, memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe)); s.block_size = cmd->blocksize; - if (ctx->strict_hash) { - return delete_hash (NULL, &s, ctx); - } - else { - res = delete_hash (frequent, &s, ctx); - if (!res) { - res = delete_hash (hashes[cmd->blocksize % BUCKETS], &s, ctx); - } - else { - (void)delete_hash (hashes[cmd->blocksize % BUCKETS], &s, ctx); - } - } - return res; + return delete_hash (NULL, &s, ctx); } /** @@ -977,9 +784,6 @@ init_fuzzy (struct rspamd_config *cfg) G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, expire), RSPAMD_CL_FLAG_TIME_FLOAT); - rspamd_rcl_register_worker_option (cfg, type, "strict_hash", - rspamd_rcl_parse_struct_boolean, ctx, - G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, strict_hash), 0); rspamd_rcl_register_worker_option (cfg, type, "allow_update", rspamd_rcl_parse_struct_string, ctx, @@ -998,7 +802,6 @@ start_fuzzy (struct rspamd_worker *worker) struct rspamd_fuzzy_storage_ctx *ctx = worker->ctx; GError *err = NULL; struct rspamd_worker_signal_handler *sigh; - gint i; ctx->ev_base = rspamd_prepare_worker (worker, "fuzzy", @@ -1021,16 +824,8 @@ start_fuzzy (struct rspamd_worker *worker) sigh->post_handler = sigterm_handler; sigh->handler_data = worker; - if (ctx->strict_hash) { - static_hash = g_hash_table_new_full (rspamd_fuzzy_hash, rspamd_fuzzy_equal, + static_hash = g_hash_table_new_full (rspamd_fuzzy_hash, rspamd_fuzzy_equal, NULL, rspamd_fuzzy_free_node); - } - else { - for (i = 0; i < BUCKETS; i++) { - hashes[i] = g_queue_new (); - } - frequent = g_queue_new (); - } /* Init bloom filter */ bf = rspamd_bloom_create (2000000L, RSPAMD_DEFAULT_BLOOM_HASHES); |