summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/fuzzy_storage.c311
1 files changed, 53 insertions, 258 deletions
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index 46d4b3ff4..43dde2988 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -38,6 +38,7 @@
#include "bloom.h"
#include "map.h"
#include "fuzzy_storage.h"
+#include <lmdb.h>
/* This number is used as limit while comparing two fuzzy hashes, this value can vary from 0 to 100 */
#define LEV_LIMIT 99
@@ -75,8 +76,6 @@ worker_t fuzzy_worker = {
SOCK_DGRAM /* UDP socket */
};
-static GQueue *hashes[BUCKETS];
-static GQueue *frequent;
static GHashTable *static_hash;
static rspamd_bloom_filter_t *bf;
@@ -88,7 +87,6 @@ static struct event tev;
static struct rspamd_stat *server_stat;
struct rspamd_fuzzy_storage_ctx {
- gboolean strict_hash;
char *hashfile;
gdouble expire;
guint32 frequent_score;
@@ -151,31 +149,15 @@ expire_nodes (gpointer *to_expire, gint expired_num,
{
gint i;
struct rspamd_fuzzy_node *node;
- GList *cur;
- GQueue *head;
for (i = 0; i < expired_num; i++) {
- if (ctx->strict_hash) {
- node = (struct rspamd_fuzzy_node *)to_expire[i];
- if (node->time != INVALID_NODE_TIME) {
- server_stat->fuzzy_hashes_expired++;
- }
- server_stat->fuzzy_hashes--;
- rspamd_bloom_del (bf, node->h.hash_pipe);
- g_hash_table_remove (static_hash, &node->h);
- }
- else {
- cur = (GList *)to_expire[i];
- node = (struct rspamd_fuzzy_node *)cur->data;
- head = hashes[node->h.block_size % BUCKETS];
- g_queue_delete_link (head, cur);
- rspamd_bloom_del (bf, node->h.hash_pipe);
- if (node->time != INVALID_NODE_TIME) {
- server_stat->fuzzy_hashes_expired++;
- }
- server_stat->fuzzy_hashes--;
- g_slice_free1 (sizeof(struct rspamd_fuzzy_node), node);
+ node = (struct rspamd_fuzzy_node *)to_expire[i];
+ if (node->time != INVALID_NODE_TIME) {
+ server_stat->fuzzy_hashes_expired++;
}
+ server_stat->fuzzy_hashes--;
+ rspamd_bloom_del (bf, node->h.hash_pipe);
+ g_hash_table_remove (static_hash, &node->h);
}
}
@@ -184,9 +166,8 @@ sync_cache (gpointer ud)
{
static const int max_expired = 8192;
struct rspamd_worker *wrk = ud;
- gint fd, i, expired_num = 0;
+ gint fd, expired_num = 0;
gchar *filename, header[4];
- GList *cur;
struct rspamd_fuzzy_node *node;
gpointer *nodes_expired = NULL;
guint64 expire, now;
@@ -241,70 +222,29 @@ sync_cache (gpointer ud)
goto end;
}
- if (ctx->strict_hash) {
- rspamd_rwlock_reader_lock (ctx->tree_lock);
- g_hash_table_iter_init (&iter, static_hash);
+ rspamd_rwlock_reader_lock (ctx->tree_lock);
+ g_hash_table_iter_init (&iter, static_hash);
- while (g_hash_table_iter_next (&iter, NULL, (void **)&node)) {
- if (node->time == INVALID_NODE_TIME || now - node->time >
- expire) {
- if (nodes_expired == NULL) {
- nodes_expired = g_malloc (
+ while (g_hash_table_iter_next (&iter, NULL, (void **)&node)) {
+ if (node->time == INVALID_NODE_TIME || now - node->time >
+ expire) {
+ if (nodes_expired == NULL) {
+ nodes_expired = g_malloc (
max_expired * sizeof (gpointer));
- }
-
- if (expired_num < max_expired) {
- nodes_expired[expired_num++] = node;
- }
- continue;
}
- if (write (fd, node, sizeof (struct rspamd_fuzzy_node)) == -1) {
- msg_err ("cannot write file %s: %s", filename,
- strerror (errno));
- goto end;
+
+ if (expired_num < max_expired) {
+ nodes_expired[expired_num++] = node;
}
+ continue;
}
- rspamd_rwlock_reader_unlock (ctx->tree_lock);
- }
- else {
- rspamd_rwlock_reader_lock (ctx->tree_lock);
- cur = frequent->head;
- while (cur) {
- node = cur->data;
- if (write (fd, node, sizeof(struct rspamd_fuzzy_node)) == -1) {
- msg_err ("cannot write file %s: %s", filename,
+ if (write (fd, node, sizeof (struct rspamd_fuzzy_node)) == -1) {
+ msg_err ("cannot write file %s: %s", filename,
strerror (errno));
- }
- cur = g_list_next (cur);
+ goto end;
}
- for (i = 0; i < BUCKETS; i++) {
- cur = hashes[i]->head;
- while (cur) {
- node = cur->data;
- if (now - node->time > expire) {
- if (nodes_expired == NULL) {
- nodes_expired =
- g_malloc (max_expired * sizeof (gpointer));
- }
-
- if (expired_num < max_expired) {
- nodes_expired[expired_num++] = cur;
- }
- cur = g_list_next (cur);
- continue;
- }
- if (write (fd, node,
- sizeof(struct rspamd_fuzzy_node)) == -1) {
- msg_err (
- "cannot write file %s: %s", filename,
- strerror (errno));
- goto end;
- }
- cur = g_list_next (cur);
- }
- }
- rspamd_rwlock_reader_unlock (ctx->tree_lock);
}
+ rspamd_rwlock_reader_unlock (ctx->tree_lock);
/* Now try to expire some nodes */
if (expired_num > 0) {
@@ -361,7 +301,7 @@ sigusr2_handler (void *arg)
static gboolean
read_hashes_file (struct rspamd_worker *wrk)
{
- gint r, fd, i, version = 0;
+ gint r, fd, version = 0;
struct stat st;
gchar *filename, header[4];
gboolean touch_stat = TRUE;
@@ -436,31 +376,13 @@ read_hashes_file (struct rspamd_worker *wrk)
break;
}
}
- if (ctx->strict_hash) {
- g_hash_table_insert (static_hash, &node->h, node);
- }
- else {
- if (node->value > (gint)ctx->frequent_score) {
- g_queue_push_head (frequent, node);
- }
- else {
- g_queue_push_head (hashes[node->h.block_size % BUCKETS], node);
- }
- }
+ g_hash_table_insert (static_hash, &node->h, node);
rspamd_bloom_add (bf, node->h.hash_pipe);
if (touch_stat) {
server_stat->fuzzy_hashes++;
}
}
- if (!ctx->strict_hash) {
- /* Sort everything */
- g_queue_sort (frequent, compare_nodes, NULL);
- for (i = 0; i < BUCKETS; i++) {
- g_queue_sort (hashes[i], compare_nodes, NULL);
- }
- }
-
(void)rspamd_file_unlock (fd, FALSE);
close (fd);
@@ -480,79 +402,25 @@ static inline struct rspamd_fuzzy_node *
check_hash_node (GQueue *hash, rspamd_fuzzy_t *s, gint update_value,
guint64 time, struct rspamd_fuzzy_storage_ctx *ctx)
{
- GList *cur;
struct rspamd_fuzzy_node *h;
- gint prob = 0;
- if (ctx->strict_hash) {
- h = g_hash_table_lookup (static_hash, s);
- if (h != NULL) {
- if (h->time == INVALID_NODE_TIME) {
- /* Node is expired */
- return NULL;
- }
- else if (update_value == 0 && time - h->time > ctx->expire) {
- h->time = INVALID_NODE_TIME;
- server_stat->fuzzy_hashes_expired++;
- return NULL;
- }
- else if (h->h.block_size== s->block_size) {
- msg_debug ("fuzzy hash was found in tree");
- if (update_value) {
- h->value += update_value;
- }
- return h;
+ h = g_hash_table_lookup (static_hash, s);
+ if (h != NULL) {
+ if (h->time == INVALID_NODE_TIME) {
+ /* Node is expired */
+ return NULL;
+ }
+ else if (update_value == 0 && time - h->time > ctx->expire) {
+ h->time = INVALID_NODE_TIME;
+ server_stat->fuzzy_hashes_expired++;
+ return NULL;
+ }
+ else if (h->h.block_size== s->block_size) {
+ msg_debug ("fuzzy hash was found in tree");
+ if (update_value) {
+ h->value += update_value;
}
- }
- }
- else {
- cur = frequent->head;
- while (cur) {
- h = cur->data;
- if ((prob = rspamd_fuzzy_compare (&h->h, s)) > LEV_LIMIT) {
- msg_info ("fuzzy hash was found, probability %d%%", prob);
- if (h->time == INVALID_NODE_TIME) {
- return NULL;
- }
- else if (update_value) {
- msg_info ("new hash weight: %d", h->value);
- h->value += update_value;
- }
- else if (time - h->time > ctx->expire) {
- h->time = INVALID_NODE_TIME;
- server_stat->fuzzy_hashes_expired++;
- return NULL;
- }
- return h;
- }
- cur = g_list_next (cur);
- }
-
- cur = hash->head;
- while (cur) {
- h = cur->data;
- if ((prob = rspamd_fuzzy_compare (&h->h, s)) > LEV_LIMIT) {
- msg_info ("fuzzy hash was found, probability %d%%", prob);
- if (h->time == INVALID_NODE_TIME) {
- return NULL;
- }
- else if (update_value) {
- msg_info ("new hash weight: %d", h->value);
- h->value += update_value;
- }
- else if (time - h->time > ctx->expire) {
- h->time = INVALID_NODE_TIME;
- server_stat->fuzzy_hashes_expired++;
- return NULL;
- }
- if (h->value > (gint)ctx->frequent_score) {
- g_queue_unlink (hash, cur);
- g_queue_push_head_link (frequent, cur);
- msg_info ("moved hash to frequent list");
- }
- return h;
- }
- cur = g_list_next (cur);
+ return h;
}
}
@@ -577,14 +445,7 @@ process_check_command (struct fuzzy_cmd *cmd,
s.block_size = cmd->blocksize;
rspamd_rwlock_reader_lock (ctx->tree_lock);
- if (ctx->strict_hash) {
- h = check_hash_node (NULL, &s, 0, time, ctx);
- }
- else {
- h =
- check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, 0, time,
- ctx);
- }
+ h = check_hash_node (NULL, &s, 0, time, ctx);
rspamd_rwlock_reader_unlock (ctx->tree_lock);
if (h == NULL) {
@@ -609,12 +470,7 @@ add_hash_node (struct fuzzy_cmd *cmd,
h->time = time;
h->value = cmd->value;
h->flag = cmd->flag;
- if (ctx->strict_hash) {
- g_hash_table_insert (static_hash, &h->h, h);
- }
- else {
- g_queue_push_head (hashes[cmd->blocksize % BUCKETS], h);
- }
+ g_hash_table_insert (static_hash, &h->h, h);
rspamd_bloom_add (bf, cmd->hash);
return h;
@@ -633,16 +489,7 @@ update_hash (struct fuzzy_cmd *cmd,
mods++;
rspamd_rwlock_writer_lock (ctx->tree_lock);
- if (ctx->strict_hash) {
- n = check_hash_node (NULL, &s, cmd->value, time, ctx);
- }
- else {
- n = check_hash_node (hashes[cmd->blocksize % BUCKETS],
- &s,
- cmd->value,
- time,
- ctx);
- }
+ n = check_hash_node (NULL, &s, cmd->value, time, ctx);
if (n == NULL) {
/* Bloom false positive */
n = add_hash_node (cmd, time, ctx);
@@ -682,43 +529,16 @@ static gboolean
delete_hash (GQueue *hash, rspamd_fuzzy_t *s,
struct rspamd_fuzzy_storage_ctx *ctx)
{
- GList *cur, *tmp;
- struct rspamd_fuzzy_node *h;
gboolean res = FALSE;
- if (ctx->strict_hash) {
- rspamd_rwlock_writer_lock (ctx->tree_lock);
- if (g_hash_table_remove (static_hash, s)) {
- rspamd_bloom_del (bf, s->hash_pipe);
- msg_info ("fuzzy hash was successfully deleted");
- server_stat->fuzzy_hashes--;
- mods++;
- }
- rspamd_rwlock_writer_unlock (ctx->tree_lock);
- }
- else {
- rspamd_rwlock_writer_lock (ctx->tree_lock);
- cur = hash->head;
-
- /* XXX: too slow way */
- while (cur) {
- h = cur->data;
- if (rspamd_fuzzy_compare (&h->h, s) > LEV_LIMIT) {
- g_slice_free1 (sizeof (struct rspamd_fuzzy_node), h);
- tmp = cur;
- cur = g_list_next (cur);
- g_queue_delete_link (hash, tmp);
- rspamd_bloom_del (bf, s->hash_pipe);
- msg_info ("fuzzy hash was successfully deleted");
- server_stat->fuzzy_hashes--;
- mods++;
- res = TRUE;
- continue;
- }
- cur = g_list_next (cur);
- }
- rspamd_rwlock_writer_unlock (ctx->tree_lock);
+ rspamd_rwlock_writer_lock (ctx->tree_lock);
+ if (g_hash_table_remove (static_hash, s)) {
+ rspamd_bloom_del (bf, s->hash_pipe);
+ msg_info ("fuzzy hash was successfully deleted");
+ server_stat->fuzzy_hashes--;
+ mods++;
}
+ rspamd_rwlock_writer_unlock (ctx->tree_lock);
return res;
@@ -730,7 +550,6 @@ process_delete_command (struct fuzzy_cmd *cmd,
struct rspamd_fuzzy_storage_ctx *ctx)
{
rspamd_fuzzy_t s;
- gboolean res = FALSE;
if (!rspamd_bloom_check (bf, cmd->hash)) {
return FALSE;
@@ -738,20 +557,8 @@ process_delete_command (struct fuzzy_cmd *cmd,
memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe));
s.block_size = cmd->blocksize;
- if (ctx->strict_hash) {
- return delete_hash (NULL, &s, ctx);
- }
- else {
- res = delete_hash (frequent, &s, ctx);
- if (!res) {
- res = delete_hash (hashes[cmd->blocksize % BUCKETS], &s, ctx);
- }
- else {
- (void)delete_hash (hashes[cmd->blocksize % BUCKETS], &s, ctx);
- }
- }
- return res;
+ return delete_hash (NULL, &s, ctx);
}
/**
@@ -977,9 +784,6 @@ init_fuzzy (struct rspamd_config *cfg)
G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx,
expire), RSPAMD_CL_FLAG_TIME_FLOAT);
- rspamd_rcl_register_worker_option (cfg, type, "strict_hash",
- rspamd_rcl_parse_struct_boolean, ctx,
- G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, strict_hash), 0);
rspamd_rcl_register_worker_option (cfg, type, "allow_update",
rspamd_rcl_parse_struct_string, ctx,
@@ -998,7 +802,6 @@ start_fuzzy (struct rspamd_worker *worker)
struct rspamd_fuzzy_storage_ctx *ctx = worker->ctx;
GError *err = NULL;
struct rspamd_worker_signal_handler *sigh;
- gint i;
ctx->ev_base = rspamd_prepare_worker (worker,
"fuzzy",
@@ -1021,16 +824,8 @@ start_fuzzy (struct rspamd_worker *worker)
sigh->post_handler = sigterm_handler;
sigh->handler_data = worker;
- if (ctx->strict_hash) {
- static_hash = g_hash_table_new_full (rspamd_fuzzy_hash, rspamd_fuzzy_equal,
+ static_hash = g_hash_table_new_full (rspamd_fuzzy_hash, rspamd_fuzzy_equal,
NULL, rspamd_fuzzy_free_node);
- }
- else {
- for (i = 0; i < BUCKETS; i++) {
- hashes[i] = g_queue_new ();
- }
- frequent = g_queue_new ();
- }
/* Init bloom filter */
bf = rspamd_bloom_create (2000000L, RSPAMD_DEFAULT_BLOOM_HASHES);