From 8f5ce03d72a90cbbef5089f97ec387a7c915faff Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 26 Dec 2013 16:03:50 +0000 Subject: [PATCH] Fix fuzzy expiration. Reported by: AL --- src/fuzzy_storage.c | 92 +++++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 32 deletions(-) diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index 1f47b0dde..51388dd3e 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -63,6 +63,8 @@ /* Current version of fuzzy hash file format */ #define CURRENT_FUZZY_VERSION 1 +#define INVALID_NODE_TIME (guint64)-1 + /* Init functions */ gpointer init_fuzzy (struct config_file *cfg); void start_fuzzy (struct rspamd_worker *worker); @@ -121,6 +123,7 @@ struct fuzzy_session { gint fd; u_char *pos; socklen_t salen; + guint64 time; union { struct sockaddr ss; struct sockaddr_storage sa; @@ -173,23 +176,15 @@ expire_nodes (gpointer *to_expire, gint expired_num, for (i = 0; i < expired_num; i ++) { #ifdef WITH_JUDY - PPvoid_t pvalue; - gpointer data; - if (ctx->use_judy) { node = (struct rspamd_fuzzy_node *)to_expire[i]; + if (node->time != INVALID_NODE_TIME) { + server_stat->fuzzy_hashes_expired ++; + } + server_stat->fuzzy_hashes --; JudySLDel (&jtree, node->h.hash_pipe, PJE0); bloom_del (bf, node->h.hash_pipe); g_slice_free1 (sizeof (struct rspamd_fuzzy_node), node); - - pvalue = JudySLGet (jtree, node->h.hash_pipe, PJE0); - if (pvalue) { - data = *pvalue; - JudySLDel (&jtree, node->h.hash_pipe, PJE0); - g_slice_free1 (sizeof (struct rspamd_fuzzy_node), data); - server_stat->fuzzy_hashes_expired ++; - server_stat->fuzzy_hashes --; - } } else { #endif @@ -198,7 +193,9 @@ expire_nodes (gpointer *to_expire, gint expired_num, head = hashes[node->h.block_size % BUCKETS]; g_queue_delete_link (head, cur); bloom_del (bf, node->h.hash_pipe); - server_stat->fuzzy_hashes_expired++; + if (node->time != INVALID_NODE_TIME) { + server_stat->fuzzy_hashes_expired ++; + } server_stat->fuzzy_hashes--; g_slice_free1 (sizeof(struct rspamd_fuzzy_node), node); #ifdef WITH_JUDY @@ -277,7 +274,7 @@ sync_cache (gpointer ud) pvalue = JudySLFirst (jtree, indexbuf, PJE0); while (pvalue) { node = *((struct rspamd_fuzzy_node **)pvalue); - if (now - node->time > expire) { + if (node->time == INVALID_NODE_TIME || now - node->time > expire) { if (nodes_expired == NULL) { nodes_expired = g_malloc (max_expired * sizeof (gpointer)); } @@ -558,7 +555,8 @@ read_hashes_file (struct rspamd_worker *wrk) } static inline struct rspamd_fuzzy_node * -check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd_fuzzy_storage_ctx *ctx) +check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, + guint64 time, struct rspamd_fuzzy_storage_ctx *ctx) { GList *cur; struct rspamd_fuzzy_node *h; @@ -571,8 +569,17 @@ check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd pvalue = JudySLGet (jtree, s->hash_pipe, PJE0); if (pvalue != NULL) { h = *((struct rspamd_fuzzy_node **)pvalue); - /* Also check block size */ - if (h->h.block_size== s->block_size) { + + if (h->time == INVALID_NODE_TIME) { + /* Node is expired */ + return NULL; + } + else if (update_value == 0 && time - h->time > ctx->expire) { + h->time = INVALID_NODE_TIME; + server_stat->fuzzy_hashes_expired ++; + return NULL; + } + else if (h->h.block_size== s->block_size) { msg_info ("fuzzy hash was found in judy tree"); if (update_value) { h->value += update_value; @@ -588,10 +595,18 @@ check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd h = cur->data; if ((prob = fuzzy_compare_hashes (&h->h, s)) > LEV_LIMIT) { msg_info ("fuzzy hash was found, probability %d%%", prob); - if (update_value) { + if (h->time == INVALID_NODE_TIME) { + return NULL; + } + else if (update_value) { msg_info ("new hash weight: %d", h->value); h->value += update_value; } + else if (time - h->time > ctx->expire) { + h->time = INVALID_NODE_TIME; + server_stat->fuzzy_hashes_expired ++; + return NULL; + } return h; } cur = g_list_next (cur); @@ -602,9 +617,17 @@ check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd h = cur->data; if ((prob = fuzzy_compare_hashes (&h->h, s)) > LEV_LIMIT) { msg_info ("fuzzy hash was found, probability %d%%", prob); - if (update_value) { - h->value += update_value; + if (h->time == INVALID_NODE_TIME) { + return NULL; + } + else if (update_value) { msg_info ("new hash weight: %d", h->value); + h->value += update_value; + } + else if (time - h->time > ctx->expire) { + h->time = INVALID_NODE_TIME; + server_stat->fuzzy_hashes_expired ++; + return NULL; } if (h->value > (gint)ctx->frequent_score) { g_queue_unlink (hash, cur); @@ -623,7 +646,7 @@ check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd } static gint -process_check_command (struct fuzzy_cmd *cmd, gint *flag, struct rspamd_fuzzy_storage_ctx *ctx) +process_check_command (struct fuzzy_cmd *cmd, gint *flag, guint64 time, struct rspamd_fuzzy_storage_ctx *ctx) { fuzzy_hash_t s; struct rspamd_fuzzy_node *h; @@ -637,7 +660,7 @@ process_check_command (struct fuzzy_cmd *cmd, gint *flag, struct rspamd_fuzzy_st s.block_size = cmd->blocksize; rspamd_rwlock_reader_lock (ctx->tree_lock); - h = check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, 0, ctx); + h = check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, 0, time, ctx); rspamd_rwlock_reader_unlock (ctx->tree_lock); if (h == NULL) { @@ -650,24 +673,28 @@ process_check_command (struct fuzzy_cmd *cmd, gint *flag, struct rspamd_fuzzy_st } static gboolean -update_hash (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *ctx) +update_hash (struct fuzzy_cmd *cmd, guint64 time, struct rspamd_fuzzy_storage_ctx *ctx) { fuzzy_hash_t s; - gboolean r; + struct rspamd_fuzzy_node *n; memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe)); s.block_size = cmd->blocksize; mods ++; rspamd_rwlock_writer_lock (ctx->tree_lock); - r = check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, cmd->value, ctx) != NULL; + n = check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, cmd->value, time, ctx); rspamd_rwlock_writer_unlock (ctx->tree_lock); - return r; + if (n != NULL) { + n->time = time; + return TRUE; + } + return FALSE; } static gboolean -process_write_command (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *ctx) +process_write_command (struct fuzzy_cmd *cmd, guint64 time, struct rspamd_fuzzy_storage_ctx *ctx) { struct rspamd_fuzzy_node *h; #ifdef WITH_JUDY @@ -675,7 +702,7 @@ process_write_command (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *c #endif if (bloom_check (bf, cmd->hash)) { - if (update_hash (cmd, ctx)) { + if (update_hash (cmd, time, ctx)) { return TRUE; } } @@ -684,7 +711,7 @@ process_write_command (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *c h = g_slice_alloc (sizeof (struct rspamd_fuzzy_node)); memcpy (&h->h.hash_pipe, &cmd->hash, sizeof (cmd->hash)); h->h.block_size = cmd->blocksize; - h->time = (guint64) time (NULL); + h->time = time; h->value = cmd->value; h->flag = cmd->flag; #ifdef WITH_JUDY @@ -764,7 +791,7 @@ delete_hash (GQueue *hash, fuzzy_hash_t *s, struct rspamd_fuzzy_storage_ctx *ctx } static gboolean -process_delete_command (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *ctx) +process_delete_command (struct fuzzy_cmd *cmd, guint64 time, struct rspamd_fuzzy_storage_ctx *ctx) { fuzzy_hash_t s; gboolean res = FALSE; @@ -817,7 +844,7 @@ check_fuzzy_client (struct fuzzy_session *session) #define CMD_PROCESS(x) \ do { \ -if (process_##x##_command (&session->cmd, session->worker->ctx)) { \ +if (process_##x##_command (&session->cmd, session->time, session->worker->ctx)) { \ if (sendto (session->fd, "OK" CRLF, sizeof ("OK" CRLF) - 1, 0, &session->client_addr.ss, session->salen) == -1) { \ msg_err ("error while writing reply: %s", strerror (errno)); \ } \ @@ -837,7 +864,7 @@ process_fuzzy_command (struct fuzzy_session *session) switch (session->cmd.cmd) { case FUZZY_CHECK: - r = process_check_command (&session->cmd, &flag, session->worker->ctx); + r = process_check_command (&session->cmd, &flag, session->time, session->worker->ctx); if (r != 0) { r = rspamd_snprintf (buf, sizeof (buf), "OK %d %d" CRLF, r, flag); if (sendto (session->fd, buf, r, 0, @@ -910,6 +937,7 @@ accept_fuzzy_socket (gint fd, short what, void *arg) session.pos = (u_char *) & session.cmd; session.salen = sizeof (session.client_addr); session.ctx = worker->ctx; + session.time = (guint64)time (NULL); /* Got some data */ if (what == EV_READ) { -- 2.39.5