]> source.dussan.org Git - rspamd.git/commitdiff
Fix fuzzy expiration.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 26 Dec 2013 16:03:50 +0000 (16:03 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 26 Dec 2013 16:03:50 +0000 (16:03 +0000)
Reported by: AL

src/fuzzy_storage.c

index 1f47b0dde1e19cfdb9a08f8621500545c6e2c1c8..51388dd3ef2ed56ba8d347c3b0b137b13aa75382 100644 (file)
@@ -63,6 +63,8 @@
 /* Current version of fuzzy hash file format */
 #define CURRENT_FUZZY_VERSION 1
 
+#define INVALID_NODE_TIME (guint64)-1
+
 /* Init functions */
 gpointer init_fuzzy (struct config_file *cfg);
 void start_fuzzy (struct rspamd_worker *worker);
@@ -121,6 +123,7 @@ struct fuzzy_session {
        gint fd;
        u_char *pos;
        socklen_t salen;
+       guint64 time;
        union {
                struct sockaddr ss;
                struct sockaddr_storage sa;
@@ -173,23 +176,15 @@ expire_nodes (gpointer *to_expire, gint expired_num,
 
        for (i = 0; i < expired_num; i ++) {
 #ifdef WITH_JUDY
-               PPvoid_t                        pvalue;
-               gpointer                        data;
-
                if (ctx->use_judy) {
                        node = (struct rspamd_fuzzy_node *)to_expire[i];
+                       if (node->time != INVALID_NODE_TIME) {
+                               server_stat->fuzzy_hashes_expired ++;
+                       }
+                       server_stat->fuzzy_hashes --;
                        JudySLDel (&jtree, node->h.hash_pipe, PJE0);
                        bloom_del (bf, node->h.hash_pipe);
                        g_slice_free1 (sizeof (struct rspamd_fuzzy_node), node);
-
-                       pvalue = JudySLGet (jtree, node->h.hash_pipe, PJE0);
-                       if (pvalue) {
-                               data = *pvalue;
-                               JudySLDel (&jtree, node->h.hash_pipe, PJE0);
-                               g_slice_free1 (sizeof (struct rspamd_fuzzy_node), data);
-                               server_stat->fuzzy_hashes_expired ++;
-                               server_stat->fuzzy_hashes --;
-                       }
                }
                else {
 #endif
@@ -198,7 +193,9 @@ expire_nodes (gpointer *to_expire, gint expired_num,
                head = hashes[node->h.block_size % BUCKETS];
                g_queue_delete_link (head, cur);
                bloom_del (bf, node->h.hash_pipe);
-               server_stat->fuzzy_hashes_expired++;
+               if (node->time != INVALID_NODE_TIME) {
+                       server_stat->fuzzy_hashes_expired ++;
+               }
                server_stat->fuzzy_hashes--;
                g_slice_free1 (sizeof(struct rspamd_fuzzy_node), node);
 #ifdef WITH_JUDY
@@ -277,7 +274,7 @@ sync_cache (gpointer ud)
                        pvalue = JudySLFirst (jtree, indexbuf, PJE0);
                        while (pvalue) {
                                node = *((struct rspamd_fuzzy_node **)pvalue);
-                               if (now - node->time > expire) {
+                               if (node->time == INVALID_NODE_TIME || now - node->time > expire) {
                                        if (nodes_expired == NULL) {
                                                nodes_expired = g_malloc (max_expired * sizeof (gpointer));
                                        }
@@ -558,7 +555,8 @@ read_hashes_file (struct rspamd_worker *wrk)
 }
 
 static inline struct rspamd_fuzzy_node *
-check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd_fuzzy_storage_ctx *ctx)
+check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value,
+               guint64 time, struct rspamd_fuzzy_storage_ctx *ctx)
 {
        GList                          *cur;
        struct rspamd_fuzzy_node       *h;
@@ -571,8 +569,17 @@ check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd
                pvalue = JudySLGet (jtree, s->hash_pipe, PJE0);
                if (pvalue != NULL) {
                        h = *((struct rspamd_fuzzy_node **)pvalue);
-                       /* Also check block size */
-                       if (h->h.block_size== s->block_size) {
+
+                       if (h->time == INVALID_NODE_TIME) {
+                               /* Node is expired */
+                               return NULL;
+                       }
+                       else if (update_value == 0 && time - h->time > ctx->expire) {
+                               h->time = INVALID_NODE_TIME;
+                               server_stat->fuzzy_hashes_expired ++;
+                               return NULL;
+                       }
+                       else if (h->h.block_size== s->block_size) {
                                msg_info ("fuzzy hash was found in judy tree");
                                if (update_value) {
                                        h->value += update_value;
@@ -588,10 +595,18 @@ check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd
                h = cur->data;
                if ((prob = fuzzy_compare_hashes (&h->h, s)) > LEV_LIMIT) {
                        msg_info ("fuzzy hash was found, probability %d%%", prob);
-                       if (update_value) {
+                       if (h->time == INVALID_NODE_TIME) {
+                               return NULL;
+                       }
+                       else if (update_value) {
                                msg_info ("new hash weight: %d", h->value);
                                h->value += update_value;
                        }
+                       else if (time - h->time > ctx->expire) {
+                               h->time = INVALID_NODE_TIME;
+                               server_stat->fuzzy_hashes_expired ++;
+                               return NULL;
+                       }
                        return h;
                }
                cur = g_list_next (cur);
@@ -602,9 +617,17 @@ check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd
                h = cur->data;
                if ((prob = fuzzy_compare_hashes (&h->h, s)) > LEV_LIMIT) {
                        msg_info ("fuzzy hash was found, probability %d%%", prob);
-                       if (update_value) {
-                               h->value += update_value;
+                       if (h->time == INVALID_NODE_TIME) {
+                               return NULL;
+                       }
+                       else if (update_value) {
                                msg_info ("new hash weight: %d", h->value);
+                               h->value += update_value;
+                       }
+                       else if (time - h->time > ctx->expire) {
+                               h->time = INVALID_NODE_TIME;
+                               server_stat->fuzzy_hashes_expired ++;
+                               return NULL;
                        }
                        if (h->value > (gint)ctx->frequent_score) {
                                g_queue_unlink (hash, cur);
@@ -623,7 +646,7 @@ check_hash_node (GQueue *hash, fuzzy_hash_t *s, gint update_value, struct rspamd
 }
 
 static                          gint
-process_check_command (struct fuzzy_cmd *cmd, gint *flag, struct rspamd_fuzzy_storage_ctx *ctx)
+process_check_command (struct fuzzy_cmd *cmd, gint *flag, guint64 time, struct rspamd_fuzzy_storage_ctx *ctx)
 {
        fuzzy_hash_t                    s;
        struct rspamd_fuzzy_node       *h;
@@ -637,7 +660,7 @@ process_check_command (struct fuzzy_cmd *cmd, gint *flag, struct rspamd_fuzzy_st
        s.block_size = cmd->blocksize;
 
        rspamd_rwlock_reader_lock (ctx->tree_lock);
-       h = check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, 0, ctx);
+       h = check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, 0, time, ctx);
        rspamd_rwlock_reader_unlock (ctx->tree_lock);
 
        if (h == NULL) {
@@ -650,24 +673,28 @@ process_check_command (struct fuzzy_cmd *cmd, gint *flag, struct rspamd_fuzzy_st
 }
 
 static                          gboolean
-update_hash (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *ctx)
+update_hash (struct fuzzy_cmd *cmd, guint64 time, struct rspamd_fuzzy_storage_ctx *ctx)
 {
        fuzzy_hash_t                    s;
-       gboolean                        r;
+       struct rspamd_fuzzy_node      *n;
 
        memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe));
        s.block_size = cmd->blocksize;
        mods ++;
 
        rspamd_rwlock_writer_lock (ctx->tree_lock);
-       r = check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, cmd->value, ctx) != NULL;
+       n = check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, cmd->value, time, ctx);
        rspamd_rwlock_writer_unlock (ctx->tree_lock);
 
-       return r;
+       if (n != NULL) {
+               n->time = time;
+               return TRUE;
+       }
+       return FALSE;
 }
 
 static                          gboolean
-process_write_command (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *ctx)
+process_write_command (struct fuzzy_cmd *cmd, guint64 time, struct rspamd_fuzzy_storage_ctx *ctx)
 {
        struct rspamd_fuzzy_node       *h;
 #ifdef WITH_JUDY
@@ -675,7 +702,7 @@ process_write_command (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *c
 #endif
 
        if (bloom_check (bf, cmd->hash)) {
-               if (update_hash (cmd, ctx)) {
+               if (update_hash (cmd, time, ctx)) {
                        return TRUE;
                }
        }
@@ -684,7 +711,7 @@ process_write_command (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *c
        h = g_slice_alloc (sizeof (struct rspamd_fuzzy_node));
        memcpy (&h->h.hash_pipe, &cmd->hash, sizeof (cmd->hash));
        h->h.block_size = cmd->blocksize;
-       h->time = (guint64) time (NULL);
+       h->time = time;
        h->value = cmd->value;
        h->flag = cmd->flag;
 #ifdef WITH_JUDY
@@ -764,7 +791,7 @@ delete_hash (GQueue *hash, fuzzy_hash_t *s, struct rspamd_fuzzy_storage_ctx *ctx
 }
 
 static                          gboolean
-process_delete_command (struct fuzzy_cmd *cmd, struct rspamd_fuzzy_storage_ctx *ctx)
+process_delete_command (struct fuzzy_cmd *cmd, guint64 time, struct rspamd_fuzzy_storage_ctx *ctx)
 {
        fuzzy_hash_t                    s;
        gboolean                        res = FALSE;
@@ -817,7 +844,7 @@ check_fuzzy_client (struct fuzzy_session *session)
 
 #define CMD_PROCESS(x)                                                                                                                                                 \
 do {                                                                                                                                                                                   \
-if (process_##x##_command (&session->cmd, session->worker->ctx)) {                                                                                                     \
+if (process_##x##_command (&session->cmd, session->time, session->worker->ctx)) {                              \
        if (sendto (session->fd, "OK" CRLF, sizeof ("OK" CRLF) - 1, 0, &session->client_addr.ss, session->salen) == -1) {                                                       \
                msg_err ("error while writing reply: %s", strerror (errno));            \
        }                                                                                                                                                                                       \
@@ -837,7 +864,7 @@ process_fuzzy_command (struct fuzzy_session *session)
 
        switch (session->cmd.cmd) {
        case FUZZY_CHECK:
-               r = process_check_command (&session->cmd, &flag, session->worker->ctx);
+               r = process_check_command (&session->cmd, &flag, session->time, session->worker->ctx);
                if (r != 0) {
                        r = rspamd_snprintf (buf, sizeof (buf), "OK %d %d" CRLF, r, flag);
                        if (sendto (session->fd, buf, r, 0,
@@ -910,6 +937,7 @@ accept_fuzzy_socket (gint fd, short what, void *arg)
        session.pos = (u_char *) & session.cmd;
        session.salen = sizeof (session.client_addr);
        session.ctx = worker->ctx;
+       session.time = (guint64)time (NULL);
 
        /* Got some data */
        if (what == EV_READ) {