]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Allow to have weak flags in fuzzy storage
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 20 May 2023 12:19:24 +0000 (13:19 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 20 May 2023 12:19:24 +0000 (13:19 +0100)
src/fuzzy_storage.c
src/libserver/fuzzy_backend/fuzzy_backend_redis.c
src/libserver/fuzzy_wire.h

index 8a7a3de4e83bcf42d10fda92ec13f2d3569ec902..fe916088021b4379acc1e75a985f0f582cb3fb30 100644 (file)
@@ -107,8 +107,7 @@ struct rspamd_leaky_bucket_elt {
 };
 
 static const guint64 rspamd_fuzzy_storage_magic = 0x291a3253eb1b3ea5ULL;
-KHASH_SET_INIT_INT(fuzzy_key_forbidden_ids);
-
+KHASH_SET_INIT_INT(fuzzy_key_ids_set);
 
 struct rspamd_fuzzy_storage_ctx {
        guint64 magic;
@@ -171,7 +170,9 @@ struct rspamd_fuzzy_storage_ctx {
        gint lua_pre_handler_cbref;
        gint lua_post_handler_cbref;
        gint lua_blacklist_cbref;
-       khash_t(fuzzy_key_forbidden_ids) *default_forbidden_ids;
+       khash_t(fuzzy_key_ids_set) *default_forbidden_ids;
+       /* Ids that should not override other ids */
+       khash_t(fuzzy_key_ids_set) *weak_ids;
 };
 
 enum fuzzy_cmd_type {
@@ -213,7 +214,7 @@ struct fuzzy_key {
        struct rspamd_cryptobox_pubkey *pk;
        struct fuzzy_key_stat *stat;
        khash_t(fuzzy_key_flag_stat) *flags_stat;
-       khash_t(fuzzy_key_forbidden_ids) *forbidden_ids;
+       khash_t(fuzzy_key_ids_set) *forbidden_ids;
 };
 
 struct rspamd_updates_cbdata {
@@ -449,7 +450,7 @@ fuzzy_key_dtor (gpointer p)
                }
 
                if (key->forbidden_ids) {
-                       kh_destroy(fuzzy_key_forbidden_ids, key->forbidden_ids);
+                       kh_destroy(fuzzy_key_ids_set, key->forbidden_ids);
                }
 
                g_free (key);
@@ -812,7 +813,7 @@ rspamd_fuzzy_make_reply (struct rspamd_fuzzy_cmd *cmd,
                {
                        khiter_t k;
 
-                       k = kh_get(fuzzy_key_forbidden_ids, session->ctx->default_forbidden_ids, session->reply.rep.v1.flag);
+                       k = kh_get(fuzzy_key_ids_set, session->ctx->default_forbidden_ids, session->reply.rep.v1.flag);
 
                        if (k != kh_end(session->ctx->default_forbidden_ids)) {
                                /* Hash is from a forbidden flag by default */
@@ -825,7 +826,7 @@ rspamd_fuzzy_make_reply (struct rspamd_fuzzy_cmd *cmd,
                        if (session->reply.rep.v1.prob > 0 && session->key && session->key->forbidden_ids) {
                                khiter_t k;
 
-                               k = kh_get(fuzzy_key_forbidden_ids, session->key->forbidden_ids, session->reply.rep.v1.flag);
+                               k = kh_get(fuzzy_key_ids_set, session->key->forbidden_ids, session->reply.rep.v1.flag);
 
                                if (k != kh_end (session->key->forbidden_ids)) {
                                        /* Hash is from a forbidden flag for this key */
@@ -1320,6 +1321,11 @@ rspamd_fuzzy_process_command (struct fuzzy_session *session)
                                }
                        }
 
+                       if (session->ctx->weak_ids && kh_get(fuzzy_key_ids_set, session->ctx->weak_ids, cmd->flag) != kh_end(session->ctx->weak_ids)) {
+                               /* Flag command as weak */
+                               cmd->version |= RSPAMD_FUZZY_FLAG_WEAK;
+                       }
+
                        if (session->worker->index == 0 || session->ctx->peer_fd == -1) {
                                /* Just add to the queue */
                                up_cmd.is_shingle = is_shingle;
@@ -1367,7 +1373,7 @@ rspamd_fuzzy_command_valid (struct rspamd_fuzzy_cmd *cmd, gint r)
 {
        enum rspamd_fuzzy_epoch ret = RSPAMD_FUZZY_EPOCH_MAX;
 
-       switch (cmd->version) {
+       switch (cmd->version & RSPAMD_FUZZY_VERSION_MASK) {
        case 4:
                if (cmd->shingles_count > 0) {
                        if (r >= sizeof (struct rspamd_fuzzy_shingle_cmd)) {
@@ -2281,16 +2287,16 @@ rspamd_fuzzy_storage_stat (struct rspamd_main *rspamd_main,
 }
 
 static gboolean
-       fuzzy_parse_forbidden_ids (rspamd_mempool_t *pool,
+fuzzy_parse_ids (rspamd_mempool_t *pool,
                const ucl_object_t *obj,
                gpointer ud,
                struct rspamd_rcl_section *section,
                GError **err)
 {
        struct rspamd_rcl_struct_parser *pd = (struct rspamd_rcl_struct_parser *)ud;
-       struct rspamd_fuzzy_storage_ctx *ctx;
+       khash_t(fuzzy_key_ids_set) *target;
 
-       ctx = (struct rspamd_fuzzy_storage_ctx *)pd->user_struct;
+       target = (khash_t(fuzzy_key_ids_set) *)pd->user_struct;
 
        if (ucl_object_type (obj) == UCL_ARRAY) {
                const ucl_object_t *cur;
@@ -2301,7 +2307,7 @@ static gboolean
                        if (ucl_object_toint_safe (cur, &id)) {
                                int r;
 
-                               kh_put(fuzzy_key_forbidden_ids, ctx->default_forbidden_ids, id, &r);
+                               kh_put(fuzzy_key_ids_set, target, id, &r);
                        }
                        else {
                                return FALSE;
@@ -2312,7 +2318,7 @@ static gboolean
        }
        else if (ucl_object_type (obj) == UCL_INT) {
                int r;
-               kh_put(fuzzy_key_forbidden_ids, ctx->default_forbidden_ids, ucl_object_toint (obj), &r);
+               kh_put(fuzzy_key_ids_set, target, ucl_object_toint (obj), &r);
 
                return TRUE;
        }
@@ -2389,13 +2395,13 @@ fuzzy_parse_keypair (rspamd_mempool_t *pool,
                        const ucl_object_t *forbidden_ids = ucl_object_lookup (extensions, "forbidden_ids");
 
                        if (forbidden_ids && ucl_object_type (forbidden_ids) == UCL_ARRAY) {
-                               key->forbidden_ids = kh_init(fuzzy_key_forbidden_ids);
+                               key->forbidden_ids = kh_init(fuzzy_key_ids_set);
                                while ((cur = ucl_object_iterate (forbidden_ids, &it, true)) != NULL) {
                                        if (ucl_object_type(cur) == UCL_INT || ucl_object_type(cur) == UCL_FLOAT) {
                                                int id = ucl_object_toint(cur);
                                                int r;
 
-                                               kh_put(fuzzy_key_forbidden_ids, key->forbidden_ids, id, &r);
+                                               kh_put(fuzzy_key_ids_set, key->forbidden_ids, id, &r);
                                        }
                                }
                        }
@@ -2462,7 +2468,8 @@ init_fuzzy (struct rspamd_config *cfg)
        ctx->leaky_bucket_burst = NAN;
        ctx->leaky_bucket_rate = NAN;
        ctx->delay = NAN;
-       ctx->default_forbidden_ids = kh_init(fuzzy_key_forbidden_ids);
+       ctx->default_forbidden_ids = kh_init(fuzzy_key_ids_set);
+       ctx->weak_ids = kh_init(fuzzy_key_ids_set);
 
        rspamd_rcl_register_worker_option (cfg,
                        type,
@@ -2535,12 +2542,21 @@ init_fuzzy (struct rspamd_config *cfg)
        rspamd_rcl_register_worker_option (cfg,
                        type,
                        "forbidden_ids",
-                       fuzzy_parse_forbidden_ids,
-                       ctx,
+                       fuzzy_parse_ids,
+                       ctx->default_forbidden_ids,
                        0,
                        0,
                        "Deny specific flags by default");
 
+       rspamd_rcl_register_worker_option (cfg,
+               type,
+               "weak_ids",
+               fuzzy_parse_ids,
+               ctx->weak_ids,
+               0,
+               0,
+               "Treat these flags as weak (i.e. they do not overwrite strong flags)");
+
        rspamd_rcl_register_worker_option (cfg,
                        type,
                        "keypair_cache_size",
@@ -3019,7 +3035,11 @@ start_fuzzy (struct rspamd_worker *worker)
        }
 
        if (ctx->default_forbidden_ids) {
-               kh_destroy(fuzzy_key_forbidden_ids, ctx->default_forbidden_ids);
+               kh_destroy(fuzzy_key_ids_set, ctx->default_forbidden_ids);
+       }
+
+       if (ctx->weak_ids) {
+               kh_destroy(fuzzy_key_ids_set, ctx->weak_ids);
        }
 
        REF_RELEASE (ctx->cfg);
index 0b7953bfe9f184fe763e3cfd2a609e16910a309f..390119fbc0012efef0218078b40d6f319130f949 100644 (file)
@@ -1049,8 +1049,16 @@ rspamd_fuzzy_update_append_command (struct rspamd_fuzzy_backend *bk,
                g_string_append_len (key, cmd->digest, sizeof (cmd->digest));
                value = g_string_sized_new (sizeof ("4294967296"));
                rspamd_printf_gstring (value, "%d", cmd->flag);
-               session->argv[cur_shift] = g_strdup ("HSET");
-               session->argv_lens[cur_shift++] = sizeof ("HSET") - 1;
+
+               if (cmd->version & RSPAMD_FUZZY_FLAG_WEAK) {
+                       session->argv[cur_shift] = g_strdup ("HSETNX");
+                       session->argv_lens[cur_shift++] = sizeof ("HSETNX") - 1;
+               }
+               else {
+                       session->argv[cur_shift] = g_strdup ("HSET");
+                       session->argv_lens[cur_shift++] = sizeof ("HSET") - 1;
+               }
+
                session->argv[cur_shift] = key->str;
                session->argv_lens[cur_shift++] = key->len;
                session->argv[cur_shift] = g_strdup ("F");
@@ -1445,7 +1453,7 @@ rspamd_fuzzy_backend_update_redis (struct rspamd_fuzzy_backend *bk,
 
        /*
         * For each normal hash addition we do 3 redis commands:
-        * HSET <key> F <flag>
+        * HSET <key> F <flag> **OR** HSETNX <key> F <flag> when flag is weak
         * HINCRBY <key> V <weight>
         * EXPIRE <key> <expire>
         * INCR <prefix||fuzzy_count>
index 8680e16fbd6cbc000c89b99c2ba3f6ce9396223d..fd65ffb38ef41159d41c065bc74890a94c2f686b 100644 (file)
@@ -13,6 +13,9 @@ extern "C" {
 #define RSPAMD_FUZZY_VERSION 4
 #define RSPAMD_FUZZY_KEYLEN 8
 
+#define RSPAMD_FUZZY_FLAG_WEAK (1u << 7u)
+/* Use lower 4 bits for the version */
+#define RSPAMD_FUZZY_VERSION_MASK 0x0fu
 /* Commands for fuzzy storage */
 #define FUZZY_CHECK 0
 #define FUZZY_WRITE 1