]> source.dussan.org Git - rspamd.git/commitdiff
Remove old lists mode.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 17 Dec 2014 21:47:25 +0000 (21:47 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 17 Dec 2014 21:47:25 +0000 (21:47 +0000)
src/fuzzy_storage.c

index 46d4b3ff4f002367448d05a08590559712994268..43dde29886f66937bf0017e65f6a41b4753d2d77 100644 (file)
@@ -38,6 +38,7 @@
 #include "bloom.h"
 #include "map.h"
 #include "fuzzy_storage.h"
+#include <lmdb.h>
 
 /* This number is used as limit while comparing two fuzzy hashes, this value can vary from 0 to 100 */
 #define LEV_LIMIT 99
@@ -75,8 +76,6 @@ worker_t fuzzy_worker = {
        SOCK_DGRAM                  /* UDP socket */
 };
 
-static GQueue *hashes[BUCKETS];
-static GQueue *frequent;
 static GHashTable *static_hash;
 static rspamd_bloom_filter_t *bf;
 
@@ -88,7 +87,6 @@ static struct event tev;
 static struct rspamd_stat *server_stat;
 
 struct rspamd_fuzzy_storage_ctx {
-       gboolean strict_hash;
        char *hashfile;
        gdouble expire;
        guint32 frequent_score;
@@ -151,31 +149,15 @@ expire_nodes (gpointer *to_expire, gint expired_num,
 {
        gint i;
        struct rspamd_fuzzy_node *node;
-       GList *cur;
-       GQueue *head;
 
        for (i = 0; i < expired_num; i++) {
-               if (ctx->strict_hash) {
-                       node = (struct rspamd_fuzzy_node *)to_expire[i];
-                       if (node->time != INVALID_NODE_TIME) {
-                               server_stat->fuzzy_hashes_expired++;
-                       }
-                       server_stat->fuzzy_hashes--;
-                       rspamd_bloom_del (bf, node->h.hash_pipe);
-                       g_hash_table_remove (static_hash, &node->h);
-               }
-               else {
-                       cur = (GList *)to_expire[i];
-                       node = (struct rspamd_fuzzy_node *)cur->data;
-                       head = hashes[node->h.block_size % BUCKETS];
-                       g_queue_delete_link (head, cur);
-                       rspamd_bloom_del (bf, node->h.hash_pipe);
-                       if (node->time != INVALID_NODE_TIME) {
-                               server_stat->fuzzy_hashes_expired++;
-                       }
-                       server_stat->fuzzy_hashes--;
-                       g_slice_free1 (sizeof(struct rspamd_fuzzy_node), node);
+               node = (struct rspamd_fuzzy_node *)to_expire[i];
+               if (node->time != INVALID_NODE_TIME) {
+                       server_stat->fuzzy_hashes_expired++;
                }
+               server_stat->fuzzy_hashes--;
+               rspamd_bloom_del (bf, node->h.hash_pipe);
+               g_hash_table_remove (static_hash, &node->h);
        }
 }
 
@@ -184,9 +166,8 @@ sync_cache (gpointer ud)
 {
        static const int max_expired = 8192;
        struct rspamd_worker *wrk = ud;
-       gint fd, i, expired_num = 0;
+       gint fd, expired_num = 0;
        gchar *filename, header[4];
-       GList *cur;
        struct rspamd_fuzzy_node *node;
        gpointer *nodes_expired = NULL;
        guint64 expire, now;
@@ -241,70 +222,29 @@ sync_cache (gpointer ud)
                        goto end;
                }
 
-               if (ctx->strict_hash) {
-                       rspamd_rwlock_reader_lock (ctx->tree_lock);
-                       g_hash_table_iter_init (&iter, static_hash);
+               rspamd_rwlock_reader_lock (ctx->tree_lock);
+               g_hash_table_iter_init (&iter, static_hash);
 
-                       while (g_hash_table_iter_next (&iter, NULL, (void **)&node)) {
-                               if (node->time == INVALID_NODE_TIME || now - node->time >
-                                       expire) {
-                                       if (nodes_expired == NULL) {
-                                               nodes_expired = g_malloc (
+               while (g_hash_table_iter_next (&iter, NULL, (void **)&node)) {
+                       if (node->time == INVALID_NODE_TIME || now - node->time >
+               expire) {
+                               if (nodes_expired == NULL) {
+                                       nodes_expired = g_malloc (
                                                        max_expired * sizeof (gpointer));
-                                       }
-
-                                       if (expired_num < max_expired) {
-                                               nodes_expired[expired_num++] = node;
-                                       }
-                                       continue;
                                }
-                               if (write (fd, node, sizeof (struct rspamd_fuzzy_node)) == -1) {
-                                       msg_err ("cannot write file %s: %s", filename,
-                                               strerror (errno));
-                                       goto end;
+
+                               if (expired_num < max_expired) {
+                                       nodes_expired[expired_num++] = node;
                                }
+                               continue;
                        }
-                       rspamd_rwlock_reader_unlock (ctx->tree_lock);
-               }
-               else {
-                       rspamd_rwlock_reader_lock (ctx->tree_lock);
-                       cur = frequent->head;
-                       while (cur) {
-                               node = cur->data;
-                               if (write (fd, node, sizeof(struct rspamd_fuzzy_node)) == -1) {
-                                       msg_err ("cannot write file %s: %s", filename,
+                       if (write (fd, node, sizeof (struct rspamd_fuzzy_node)) == -1) {
+                               msg_err ("cannot write file %s: %s", filename,
                                                strerror (errno));
-                               }
-                               cur = g_list_next (cur);
+                               goto end;
                        }
-                       for (i = 0; i < BUCKETS; i++) {
-                               cur = hashes[i]->head;
-                               while (cur) {
-                                       node = cur->data;
-                                       if (now - node->time > expire) {
-                                               if (nodes_expired == NULL) {
-                                                       nodes_expired =
-                                                               g_malloc (max_expired * sizeof (gpointer));
-                                               }
-
-                                               if (expired_num < max_expired) {
-                                                       nodes_expired[expired_num++] = cur;
-                                               }
-                                               cur = g_list_next (cur);
-                                               continue;
-                                       }
-                                       if (write (fd, node,
-                                               sizeof(struct rspamd_fuzzy_node)) == -1) {
-                                               msg_err (
-                                                       "cannot write file %s: %s", filename,
-                                                       strerror (errno));
-                                               goto end;
-                                       }
-                                       cur = g_list_next (cur);
-                               }
-                       }
-                       rspamd_rwlock_reader_unlock (ctx->tree_lock);
                }
+               rspamd_rwlock_reader_unlock (ctx->tree_lock);
 
                /* Now try to expire some nodes */
                if (expired_num > 0) {
@@ -361,7 +301,7 @@ sigusr2_handler (void *arg)
 static gboolean
 read_hashes_file (struct rspamd_worker *wrk)
 {
-       gint r, fd, i, version = 0;
+       gint r, fd, version = 0;
        struct stat st;
        gchar *filename, header[4];
        gboolean touch_stat = TRUE;
@@ -436,31 +376,13 @@ read_hashes_file (struct rspamd_worker *wrk)
                                break;
                        }
                }
-               if (ctx->strict_hash) {
-                       g_hash_table_insert (static_hash, &node->h, node);
-               }
-               else {
-                       if (node->value > (gint)ctx->frequent_score) {
-                               g_queue_push_head (frequent, node);
-                       }
-                       else {
-                               g_queue_push_head (hashes[node->h.block_size % BUCKETS], node);
-                       }
-               }
+               g_hash_table_insert (static_hash, &node->h, node);
                rspamd_bloom_add (bf, node->h.hash_pipe);
                if (touch_stat) {
                        server_stat->fuzzy_hashes++;
                }
        }
 
-       if (!ctx->strict_hash) {
-               /* Sort everything */
-               g_queue_sort (frequent, compare_nodes, NULL);
-               for (i = 0; i < BUCKETS; i++) {
-                       g_queue_sort (hashes[i], compare_nodes, NULL);
-               }
-       }
-
        (void)rspamd_file_unlock (fd, FALSE);
        close (fd);
 
@@ -480,79 +402,25 @@ static inline struct rspamd_fuzzy_node *
 check_hash_node (GQueue *hash, rspamd_fuzzy_t *s, gint update_value,
        guint64 time, struct rspamd_fuzzy_storage_ctx *ctx)
 {
-       GList *cur;
        struct rspamd_fuzzy_node *h;
-       gint prob = 0;
 
-       if (ctx->strict_hash) {
-               h = g_hash_table_lookup (static_hash, s);
-               if (h != NULL) {
-                       if (h->time == INVALID_NODE_TIME) {
-                               /* Node is expired */
-                               return NULL;
-                       }
-                       else if (update_value == 0 && time - h->time > ctx->expire) {
-                               h->time = INVALID_NODE_TIME;
-                               server_stat->fuzzy_hashes_expired++;
-                               return NULL;
-                       }
-                       else if (h->h.block_size== s->block_size) {
-                               msg_debug ("fuzzy hash was found in tree");
-                               if (update_value) {
-                                       h->value += update_value;
-                               }
-                               return h;
+       h = g_hash_table_lookup (static_hash, s);
+       if (h != NULL) {
+               if (h->time == INVALID_NODE_TIME) {
+                       /* Node is expired */
+                       return NULL;
+               }
+               else if (update_value == 0 && time - h->time > ctx->expire) {
+                       h->time = INVALID_NODE_TIME;
+                       server_stat->fuzzy_hashes_expired++;
+                       return NULL;
+               }
+               else if (h->h.block_size== s->block_size) {
+                       msg_debug ("fuzzy hash was found in tree");
+                       if (update_value) {
+                               h->value += update_value;
                        }
-               }
-       }
-       else {
-               cur = frequent->head;
-               while (cur) {
-                       h = cur->data;
-                       if ((prob = rspamd_fuzzy_compare (&h->h, s)) > LEV_LIMIT) {
-                               msg_info ("fuzzy hash was found, probability %d%%", prob);
-                               if (h->time == INVALID_NODE_TIME) {
-                                       return NULL;
-                               }
-                               else if (update_value) {
-                                       msg_info ("new hash weight: %d", h->value);
-                                       h->value += update_value;
-                               }
-                               else if (time - h->time > ctx->expire) {
-                                       h->time = INVALID_NODE_TIME;
-                                       server_stat->fuzzy_hashes_expired++;
-                                       return NULL;
-                               }
-                               return h;
-                       }
-                       cur = g_list_next (cur);
-               }
-
-               cur = hash->head;
-               while (cur) {
-                       h = cur->data;
-                       if ((prob = rspamd_fuzzy_compare (&h->h, s)) > LEV_LIMIT) {
-                               msg_info ("fuzzy hash was found, probability %d%%", prob);
-                               if (h->time == INVALID_NODE_TIME) {
-                                       return NULL;
-                               }
-                               else if (update_value) {
-                                       msg_info ("new hash weight: %d", h->value);
-                                       h->value += update_value;
-                               }
-                               else if (time - h->time > ctx->expire) {
-                                       h->time = INVALID_NODE_TIME;
-                                       server_stat->fuzzy_hashes_expired++;
-                                       return NULL;
-                               }
-                               if (h->value > (gint)ctx->frequent_score) {
-                                       g_queue_unlink (hash, cur);
-                                       g_queue_push_head_link (frequent, cur);
-                                       msg_info ("moved hash to frequent list");
-                               }
-                               return h;
-                       }
-                       cur = g_list_next (cur);
+                       return h;
                }
        }
 
@@ -577,14 +445,7 @@ process_check_command (struct fuzzy_cmd *cmd,
        s.block_size = cmd->blocksize;
 
        rspamd_rwlock_reader_lock (ctx->tree_lock);
-       if (ctx->strict_hash) {
-               h = check_hash_node (NULL, &s, 0, time, ctx);
-       }
-       else {
-               h =
-                       check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, 0, time,
-                               ctx);
-       }
+       h = check_hash_node (NULL, &s, 0, time, ctx);
        rspamd_rwlock_reader_unlock (ctx->tree_lock);
 
        if (h == NULL) {
@@ -609,12 +470,7 @@ add_hash_node (struct fuzzy_cmd *cmd,
        h->time = time;
        h->value = cmd->value;
        h->flag = cmd->flag;
-       if (ctx->strict_hash) {
-               g_hash_table_insert (static_hash, &h->h, h);
-       }
-       else {
-               g_queue_push_head (hashes[cmd->blocksize % BUCKETS], h);
-       }
+       g_hash_table_insert (static_hash, &h->h, h);
        rspamd_bloom_add (bf, cmd->hash);
 
        return h;
@@ -633,16 +489,7 @@ update_hash (struct fuzzy_cmd *cmd,
        mods++;
 
        rspamd_rwlock_writer_lock (ctx->tree_lock);
-       if (ctx->strict_hash) {
-               n = check_hash_node (NULL, &s, cmd->value, time, ctx);
-       }
-       else {
-               n = check_hash_node (hashes[cmd->blocksize % BUCKETS],
-                               &s,
-                               cmd->value,
-                               time,
-                               ctx);
-       }
+       n = check_hash_node (NULL, &s, cmd->value, time, ctx);
        if (n == NULL) {
                /* Bloom false positive */
                n = add_hash_node (cmd, time, ctx);
@@ -682,43 +529,16 @@ static gboolean
 delete_hash (GQueue *hash, rspamd_fuzzy_t *s,
        struct rspamd_fuzzy_storage_ctx *ctx)
 {
-       GList *cur, *tmp;
-       struct rspamd_fuzzy_node *h;
        gboolean res = FALSE;
 
-       if (ctx->strict_hash) {
-               rspamd_rwlock_writer_lock (ctx->tree_lock);
-               if (g_hash_table_remove (static_hash, s)) {
-                       rspamd_bloom_del (bf, s->hash_pipe);
-                       msg_info ("fuzzy hash was successfully deleted");
-                       server_stat->fuzzy_hashes--;
-                       mods++;
-               }
-               rspamd_rwlock_writer_unlock (ctx->tree_lock);
-       }
-       else {
-               rspamd_rwlock_writer_lock (ctx->tree_lock);
-               cur = hash->head;
-
-               /* XXX: too slow way */
-               while (cur) {
-                       h = cur->data;
-                       if (rspamd_fuzzy_compare (&h->h, s) > LEV_LIMIT) {
-                               g_slice_free1 (sizeof (struct rspamd_fuzzy_node), h);
-                               tmp = cur;
-                               cur = g_list_next (cur);
-                               g_queue_delete_link (hash, tmp);
-                               rspamd_bloom_del (bf, s->hash_pipe);
-                               msg_info ("fuzzy hash was successfully deleted");
-                               server_stat->fuzzy_hashes--;
-                               mods++;
-                               res = TRUE;
-                               continue;
-                       }
-                       cur = g_list_next (cur);
-               }
-               rspamd_rwlock_writer_unlock (ctx->tree_lock);
+       rspamd_rwlock_writer_lock (ctx->tree_lock);
+       if (g_hash_table_remove (static_hash, s)) {
+               rspamd_bloom_del (bf, s->hash_pipe);
+               msg_info ("fuzzy hash was successfully deleted");
+               server_stat->fuzzy_hashes--;
+               mods++;
        }
+       rspamd_rwlock_writer_unlock (ctx->tree_lock);
 
        return res;
 
@@ -730,7 +550,6 @@ process_delete_command (struct fuzzy_cmd *cmd,
        struct rspamd_fuzzy_storage_ctx *ctx)
 {
        rspamd_fuzzy_t s;
-       gboolean res = FALSE;
 
        if (!rspamd_bloom_check (bf, cmd->hash)) {
                return FALSE;
@@ -738,20 +557,8 @@ process_delete_command (struct fuzzy_cmd *cmd,
 
        memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe));
        s.block_size = cmd->blocksize;
-       if (ctx->strict_hash) {
-               return delete_hash (NULL, &s, ctx);
-       }
-       else {
-               res = delete_hash (frequent, &s, ctx);
-               if (!res) {
-                       res = delete_hash (hashes[cmd->blocksize % BUCKETS], &s, ctx);
-               }
-               else {
-                       (void)delete_hash (hashes[cmd->blocksize % BUCKETS], &s, ctx);
-               }
-       }
 
-       return res;
+       return delete_hash (NULL, &s, ctx);
 }
 
 /**
@@ -977,9 +784,6 @@ init_fuzzy (struct rspamd_config *cfg)
                G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx,
                expire), RSPAMD_CL_FLAG_TIME_FLOAT);
 
-       rspamd_rcl_register_worker_option (cfg, type, "strict_hash",
-               rspamd_rcl_parse_struct_boolean, ctx,
-               G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, strict_hash), 0);
 
        rspamd_rcl_register_worker_option (cfg, type, "allow_update",
                rspamd_rcl_parse_struct_string, ctx,
@@ -998,7 +802,6 @@ start_fuzzy (struct rspamd_worker *worker)
        struct rspamd_fuzzy_storage_ctx *ctx = worker->ctx;
        GError *err = NULL;
        struct rspamd_worker_signal_handler *sigh;
-       gint i;
 
        ctx->ev_base = rspamd_prepare_worker (worker,
                        "fuzzy",
@@ -1021,16 +824,8 @@ start_fuzzy (struct rspamd_worker *worker)
        sigh->post_handler = sigterm_handler;
        sigh->handler_data = worker;
 
-       if (ctx->strict_hash) {
-               static_hash = g_hash_table_new_full (rspamd_fuzzy_hash, rspamd_fuzzy_equal,
+       static_hash = g_hash_table_new_full (rspamd_fuzzy_hash, rspamd_fuzzy_equal,
                                NULL, rspamd_fuzzy_free_node);
-       }
-       else {
-               for (i = 0; i < BUCKETS; i++) {
-                       hashes[i] = g_queue_new ();
-               }
-               frequent = g_queue_new ();
-       }
 
        /* Init bloom filter */
        bf = rspamd_bloom_create (2000000L, RSPAMD_DEFAULT_BLOOM_HASHES);