#include "bloom.h"
#include "map.h"
#include "fuzzy_storage.h"
+#include <lmdb.h>
/* This number is used as limit while comparing two fuzzy hashes, this value can vary from 0 to 100 */
#define LEV_LIMIT 99
SOCK_DGRAM /* UDP socket */
};
-static GQueue *hashes[BUCKETS];
-static GQueue *frequent;
static GHashTable *static_hash;
static rspamd_bloom_filter_t *bf;
static struct rspamd_stat *server_stat;
struct rspamd_fuzzy_storage_ctx {
- gboolean strict_hash;
char *hashfile;
gdouble expire;
guint32 frequent_score;
{
gint i;
struct rspamd_fuzzy_node *node;
- GList *cur;
- GQueue *head;
for (i = 0; i < expired_num; i++) {
- if (ctx->strict_hash) {
- node = (struct rspamd_fuzzy_node *)to_expire[i];
- if (node->time != INVALID_NODE_TIME) {
- server_stat->fuzzy_hashes_expired++;
- }
- server_stat->fuzzy_hashes--;
- rspamd_bloom_del (bf, node->h.hash_pipe);
- g_hash_table_remove (static_hash, &node->h);
- }
- else {
- cur = (GList *)to_expire[i];
- node = (struct rspamd_fuzzy_node *)cur->data;
- head = hashes[node->h.block_size % BUCKETS];
- g_queue_delete_link (head, cur);
- rspamd_bloom_del (bf, node->h.hash_pipe);
- if (node->time != INVALID_NODE_TIME) {
- server_stat->fuzzy_hashes_expired++;
- }
- server_stat->fuzzy_hashes--;
- g_slice_free1 (sizeof(struct rspamd_fuzzy_node), node);
+ node = (struct rspamd_fuzzy_node *)to_expire[i];
+ if (node->time != INVALID_NODE_TIME) {
+ server_stat->fuzzy_hashes_expired++;
}
+ server_stat->fuzzy_hashes--;
+ rspamd_bloom_del (bf, node->h.hash_pipe);
+ g_hash_table_remove (static_hash, &node->h);
}
}
{
static const int max_expired = 8192;
struct rspamd_worker *wrk = ud;
- gint fd, i, expired_num = 0;
+ gint fd, expired_num = 0;
gchar *filename, header[4];
- GList *cur;
struct rspamd_fuzzy_node *node;
gpointer *nodes_expired = NULL;
guint64 expire, now;
goto end;
}
- if (ctx->strict_hash) {
- rspamd_rwlock_reader_lock (ctx->tree_lock);
- g_hash_table_iter_init (&iter, static_hash);
+ rspamd_rwlock_reader_lock (ctx->tree_lock);
+ g_hash_table_iter_init (&iter, static_hash);
- while (g_hash_table_iter_next (&iter, NULL, (void **)&node)) {
- if (node->time == INVALID_NODE_TIME || now - node->time >
- expire) {
- if (nodes_expired == NULL) {
- nodes_expired = g_malloc (
+ while (g_hash_table_iter_next (&iter, NULL, (void **)&node)) {
+ if (node->time == INVALID_NODE_TIME || now - node->time >
+ expire) {
+ if (nodes_expired == NULL) {
+ nodes_expired = g_malloc (
max_expired * sizeof (gpointer));
- }
-
- if (expired_num < max_expired) {
- nodes_expired[expired_num++] = node;
- }
- continue;
}
- if (write (fd, node, sizeof (struct rspamd_fuzzy_node)) == -1) {
- msg_err ("cannot write file %s: %s", filename,
- strerror (errno));
- goto end;
+
+ if (expired_num < max_expired) {
+ nodes_expired[expired_num++] = node;
}
+ continue;
}
- rspamd_rwlock_reader_unlock (ctx->tree_lock);
- }
- else {
- rspamd_rwlock_reader_lock (ctx->tree_lock);
- cur = frequent->head;
- while (cur) {
- node = cur->data;
- if (write (fd, node, sizeof(struct rspamd_fuzzy_node)) == -1) {
- msg_err ("cannot write file %s: %s", filename,
+ if (write (fd, node, sizeof (struct rspamd_fuzzy_node)) == -1) {
+ msg_err ("cannot write file %s: %s", filename,
strerror (errno));
- }
- cur = g_list_next (cur);
+ goto end;
}
- for (i = 0; i < BUCKETS; i++) {
- cur = hashes[i]->head;
- while (cur) {
- node = cur->data;
- if (now - node->time > expire) {
- if (nodes_expired == NULL) {
- nodes_expired =
- g_malloc (max_expired * sizeof (gpointer));
- }
-
- if (expired_num < max_expired) {
- nodes_expired[expired_num++] = cur;
- }
- cur = g_list_next (cur);
- continue;
- }
- if (write (fd, node,
- sizeof(struct rspamd_fuzzy_node)) == -1) {
- msg_err (
- "cannot write file %s: %s", filename,
- strerror (errno));
- goto end;
- }
- cur = g_list_next (cur);
- }
- }
- rspamd_rwlock_reader_unlock (ctx->tree_lock);
}
+ rspamd_rwlock_reader_unlock (ctx->tree_lock);
/* Now try to expire some nodes */
if (expired_num > 0) {
static gboolean
read_hashes_file (struct rspamd_worker *wrk)
{
- gint r, fd, i, version = 0;
+ gint r, fd, version = 0;
struct stat st;
gchar *filename, header[4];
gboolean touch_stat = TRUE;
break;
}
}
- if (ctx->strict_hash) {
- g_hash_table_insert (static_hash, &node->h, node);
- }
- else {
- if (node->value > (gint)ctx->frequent_score) {
- g_queue_push_head (frequent, node);
- }
- else {
- g_queue_push_head (hashes[node->h.block_size % BUCKETS], node);
- }
- }
+ g_hash_table_insert (static_hash, &node->h, node);
rspamd_bloom_add (bf, node->h.hash_pipe);
if (touch_stat) {
server_stat->fuzzy_hashes++;
}
}
- if (!ctx->strict_hash) {
- /* Sort everything */
- g_queue_sort (frequent, compare_nodes, NULL);
- for (i = 0; i < BUCKETS; i++) {
- g_queue_sort (hashes[i], compare_nodes, NULL);
- }
- }
-
(void)rspamd_file_unlock (fd, FALSE);
close (fd);
check_hash_node (GQueue *hash, rspamd_fuzzy_t *s, gint update_value,
guint64 time, struct rspamd_fuzzy_storage_ctx *ctx)
{
- GList *cur;
struct rspamd_fuzzy_node *h;
- gint prob = 0;
- if (ctx->strict_hash) {
- h = g_hash_table_lookup (static_hash, s);
- if (h != NULL) {
- if (h->time == INVALID_NODE_TIME) {
- /* Node is expired */
- return NULL;
- }
- else if (update_value == 0 && time - h->time > ctx->expire) {
- h->time = INVALID_NODE_TIME;
- server_stat->fuzzy_hashes_expired++;
- return NULL;
- }
- else if (h->h.block_size== s->block_size) {
- msg_debug ("fuzzy hash was found in tree");
- if (update_value) {
- h->value += update_value;
- }
- return h;
+ h = g_hash_table_lookup (static_hash, s);
+ if (h != NULL) {
+ if (h->time == INVALID_NODE_TIME) {
+ /* Node is expired */
+ return NULL;
+ }
+ else if (update_value == 0 && time - h->time > ctx->expire) {
+ h->time = INVALID_NODE_TIME;
+ server_stat->fuzzy_hashes_expired++;
+ return NULL;
+ }
+ else if (h->h.block_size== s->block_size) {
+ msg_debug ("fuzzy hash was found in tree");
+ if (update_value) {
+ h->value += update_value;
}
- }
- }
- else {
- cur = frequent->head;
- while (cur) {
- h = cur->data;
- if ((prob = rspamd_fuzzy_compare (&h->h, s)) > LEV_LIMIT) {
- msg_info ("fuzzy hash was found, probability %d%%", prob);
- if (h->time == INVALID_NODE_TIME) {
- return NULL;
- }
- else if (update_value) {
- msg_info ("new hash weight: %d", h->value);
- h->value += update_value;
- }
- else if (time - h->time > ctx->expire) {
- h->time = INVALID_NODE_TIME;
- server_stat->fuzzy_hashes_expired++;
- return NULL;
- }
- return h;
- }
- cur = g_list_next (cur);
- }
-
- cur = hash->head;
- while (cur) {
- h = cur->data;
- if ((prob = rspamd_fuzzy_compare (&h->h, s)) > LEV_LIMIT) {
- msg_info ("fuzzy hash was found, probability %d%%", prob);
- if (h->time == INVALID_NODE_TIME) {
- return NULL;
- }
- else if (update_value) {
- msg_info ("new hash weight: %d", h->value);
- h->value += update_value;
- }
- else if (time - h->time > ctx->expire) {
- h->time = INVALID_NODE_TIME;
- server_stat->fuzzy_hashes_expired++;
- return NULL;
- }
- if (h->value > (gint)ctx->frequent_score) {
- g_queue_unlink (hash, cur);
- g_queue_push_head_link (frequent, cur);
- msg_info ("moved hash to frequent list");
- }
- return h;
- }
- cur = g_list_next (cur);
+ return h;
}
}
s.block_size = cmd->blocksize;
rspamd_rwlock_reader_lock (ctx->tree_lock);
- if (ctx->strict_hash) {
- h = check_hash_node (NULL, &s, 0, time, ctx);
- }
- else {
- h =
- check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, 0, time,
- ctx);
- }
+ h = check_hash_node (NULL, &s, 0, time, ctx);
rspamd_rwlock_reader_unlock (ctx->tree_lock);
if (h == NULL) {
h->time = time;
h->value = cmd->value;
h->flag = cmd->flag;
- if (ctx->strict_hash) {
- g_hash_table_insert (static_hash, &h->h, h);
- }
- else {
- g_queue_push_head (hashes[cmd->blocksize % BUCKETS], h);
- }
+ g_hash_table_insert (static_hash, &h->h, h);
rspamd_bloom_add (bf, cmd->hash);
return h;
mods++;
rspamd_rwlock_writer_lock (ctx->tree_lock);
- if (ctx->strict_hash) {
- n = check_hash_node (NULL, &s, cmd->value, time, ctx);
- }
- else {
- n = check_hash_node (hashes[cmd->blocksize % BUCKETS],
- &s,
- cmd->value,
- time,
- ctx);
- }
+ n = check_hash_node (NULL, &s, cmd->value, time, ctx);
if (n == NULL) {
/* Bloom false positive */
n = add_hash_node (cmd, time, ctx);
delete_hash (GQueue *hash, rspamd_fuzzy_t *s,
struct rspamd_fuzzy_storage_ctx *ctx)
{
- GList *cur, *tmp;
- struct rspamd_fuzzy_node *h;
gboolean res = FALSE;
- if (ctx->strict_hash) {
- rspamd_rwlock_writer_lock (ctx->tree_lock);
- if (g_hash_table_remove (static_hash, s)) {
- rspamd_bloom_del (bf, s->hash_pipe);
- msg_info ("fuzzy hash was successfully deleted");
- server_stat->fuzzy_hashes--;
- mods++;
- }
- rspamd_rwlock_writer_unlock (ctx->tree_lock);
- }
- else {
- rspamd_rwlock_writer_lock (ctx->tree_lock);
- cur = hash->head;
-
- /* XXX: too slow way */
- while (cur) {
- h = cur->data;
- if (rspamd_fuzzy_compare (&h->h, s) > LEV_LIMIT) {
- g_slice_free1 (sizeof (struct rspamd_fuzzy_node), h);
- tmp = cur;
- cur = g_list_next (cur);
- g_queue_delete_link (hash, tmp);
- rspamd_bloom_del (bf, s->hash_pipe);
- msg_info ("fuzzy hash was successfully deleted");
- server_stat->fuzzy_hashes--;
- mods++;
- res = TRUE;
- continue;
- }
- cur = g_list_next (cur);
- }
- rspamd_rwlock_writer_unlock (ctx->tree_lock);
+ rspamd_rwlock_writer_lock (ctx->tree_lock);
+ if (g_hash_table_remove (static_hash, s)) {
+ rspamd_bloom_del (bf, s->hash_pipe);
+ msg_info ("fuzzy hash was successfully deleted");
+ server_stat->fuzzy_hashes--;
+ mods++;
}
+ rspamd_rwlock_writer_unlock (ctx->tree_lock);
return res;
struct rspamd_fuzzy_storage_ctx *ctx)
{
rspamd_fuzzy_t s;
- gboolean res = FALSE;
if (!rspamd_bloom_check (bf, cmd->hash)) {
return FALSE;
memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe));
s.block_size = cmd->blocksize;
- if (ctx->strict_hash) {
- return delete_hash (NULL, &s, ctx);
- }
- else {
- res = delete_hash (frequent, &s, ctx);
- if (!res) {
- res = delete_hash (hashes[cmd->blocksize % BUCKETS], &s, ctx);
- }
- else {
- (void)delete_hash (hashes[cmd->blocksize % BUCKETS], &s, ctx);
- }
- }
- return res;
+ return delete_hash (NULL, &s, ctx);
}
/**
G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx,
expire), RSPAMD_CL_FLAG_TIME_FLOAT);
- rspamd_rcl_register_worker_option (cfg, type, "strict_hash",
- rspamd_rcl_parse_struct_boolean, ctx,
- G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, strict_hash), 0);
rspamd_rcl_register_worker_option (cfg, type, "allow_update",
rspamd_rcl_parse_struct_string, ctx,
struct rspamd_fuzzy_storage_ctx *ctx = worker->ctx;
GError *err = NULL;
struct rspamd_worker_signal_handler *sigh;
- gint i;
ctx->ev_base = rspamd_prepare_worker (worker,
"fuzzy",
sigh->post_handler = sigterm_handler;
sigh->handler_data = worker;
- if (ctx->strict_hash) {
- static_hash = g_hash_table_new_full (rspamd_fuzzy_hash, rspamd_fuzzy_equal,
+ static_hash = g_hash_table_new_full (rspamd_fuzzy_hash, rspamd_fuzzy_equal,
NULL, rspamd_fuzzy_free_node);
- }
- else {
- for (i = 0; i < BUCKETS; i++) {
- hashes[i] = g_queue_new ();
- }
- frequent = g_queue_new ();
- }
/* Init bloom filter */
bf = rspamd_bloom_create (2000000L, RSPAMD_DEFAULT_BLOOM_HASHES);