aboutsummaryrefslogtreecommitdiffstats
path: root/src/fuzzy_storage.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-02-14 16:24:46 +0000
committerVsevolod Stakhov <vsevolod@rspamd.com>2024-02-14 16:24:46 +0000
commitc5da2ff5e9eaf9410e25c0bb36c4833547e5103f (patch)
treede05769d5230a00d34c1da43c31924dbac6d9ae1 /src/fuzzy_storage.c
parent64c5a15ddd56ced2f87cebcfa66b2847d5ae860b (diff)
downloadrspamd-c5da2ff5e9eaf9410e25c0bb36c4833547e5103f.tar.gz
rspamd-c5da2ff5e9eaf9410e25c0bb36c4833547e5103f.zip
[Rework] Use khash instead of glib hashes for many reasons
Diffstat (limited to 'src/fuzzy_storage.c')
-rw-r--r--src/fuzzy_storage.c209
1 files changed, 125 insertions, 84 deletions
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index d42dffdce..7307dcb4a 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -107,7 +107,37 @@ struct rspamd_leaky_bucket_elt {
};
static const guint64 rspamd_fuzzy_storage_magic = 0x291a3253eb1b3ea5ULL;
+
+static int64_t
+fuzzy_kp_hash(const unsigned char *p)
+{
+ int64_t res;
+
+ memcpy(&res, p, sizeof(res));
+ return res;
+}
+static bool
+fuzzy_kp_equal(gconstpointer a, gconstpointer b)
+{
+ const guchar *pa = a, *pb = b;
+
+ return (memcmp(pa, pb, RSPAMD_FUZZY_KEYLEN) == 0);
+}
+
KHASH_SET_INIT_INT(fuzzy_key_ids_set);
+KHASH_INIT(fuzzy_key_flag_stat, int, struct fuzzy_key_stat, 1, kh_int_hash_func,
+ kh_int_hash_equal);
+struct fuzzy_key {
+ struct rspamd_cryptobox_keypair *key;
+ struct rspamd_cryptobox_pubkey *pk;
+ struct fuzzy_key_stat *stat;
+ khash_t(fuzzy_key_flag_stat) * flags_stat;
+ khash_t(fuzzy_key_ids_set) * forbidden_ids;
+};
+
+KHASH_INIT(rspamd_fuzzy_keys_hash,
+ const unsigned char *, struct fuzzy_key *, 1,
+ fuzzy_kp_hash, fuzzy_kp_equal);
struct rspamd_fuzzy_storage_ctx {
guint64 magic;
@@ -141,7 +171,8 @@ struct rspamd_fuzzy_storage_ctx {
/* Local keypair */
struct rspamd_cryptobox_keypair *default_keypair; /* Bad clash, need for parse keypair */
struct fuzzy_key *default_key;
- GHashTable *keys;
+ khash_t(rspamd_fuzzy_keys_hash) * keys;
+
gboolean encrypted_only;
gboolean read_only;
gboolean dedicated_update_worker;
@@ -207,16 +238,6 @@ struct fuzzy_peer_request {
struct fuzzy_peer_cmd cmd;
};
-KHASH_INIT(fuzzy_key_flag_stat, int, struct fuzzy_key_stat, 1, kh_int_hash_func,
- kh_int_hash_equal);
-struct fuzzy_key {
- struct rspamd_cryptobox_keypair *key;
- struct rspamd_cryptobox_pubkey *pk;
- struct fuzzy_key_stat *stat;
- khash_t(fuzzy_key_flag_stat) * flags_stat;
- khash_t(fuzzy_key_ids_set) * forbidden_ids;
-};
-
struct rspamd_updates_cbdata {
GArray *updates_pending;
struct rspamd_fuzzy_storage_ctx *ctx;
@@ -494,6 +515,16 @@ fuzzy_key_dtor(gpointer p)
}
static void
+fuzzy_hash_table_dtor(khash_t(rspamd_fuzzy_keys_hash) * hash)
+{
+ struct fuzzy_key *key;
+ kh_foreach_value(hash, key, {
+ fuzzy_key_dtor(key);
+ });
+ kh_destroy(rspamd_fuzzy_keys_hash, hash);
+}
+
+static void
fuzzy_count_callback(guint64 count, void *ud)
{
struct rspamd_fuzzy_storage_ctx *ctx = ud;
@@ -1446,7 +1477,7 @@ rspamd_fuzzy_decrypt_command(struct fuzzy_session *s, guchar *buf, gsize buflen)
{
struct rspamd_fuzzy_encrypted_req_hdr hdr;
struct rspamd_cryptobox_pubkey *rk;
- struct fuzzy_key *key;
+ struct fuzzy_key *key = NULL;
if (s->ctx->default_key == NULL) {
msg_warn("received encrypted request when encryption is not enabled");
@@ -1463,16 +1494,18 @@ rspamd_fuzzy_decrypt_command(struct fuzzy_session *s, guchar *buf, gsize buflen)
buflen -= sizeof(hdr);
/* Try to find the desired key */
- key = g_hash_table_lookup(s->ctx->keys, hdr.key_id);
-
- if (key == NULL) {
+ khiter_t k = kh_get(rspamd_fuzzy_keys_hash, s->ctx->keys, hdr.key_id);
+ if (k == kh_end(s->ctx->keys)) {
/* Unknown key, assume default one */
key = s->ctx->default_key;
}
+ else {
+ key = kh_val(s->ctx->keys, k);
+ }
s->key = key;
- /* Now process keypair */
+ /* Now process the remote pubkey */
rk = rspamd_pubkey_from_bin(hdr.pubkey, sizeof(hdr.pubkey),
RSPAMD_KEYPAIR_KEX, RSPAMD_CRYPTOBOX_MODE_25519);
@@ -1482,6 +1515,7 @@ rspamd_fuzzy_decrypt_command(struct fuzzy_session *s, guchar *buf, gsize buflen)
return FALSE;
}
+ /* Try to get the cached NM */
rspamd_keypair_cache_process(s->ctx->keypair_cache, key->key, rk);
/* Now decrypt request */
@@ -2088,64 +2122,65 @@ rspamd_fuzzy_storage_stat_key(const struct fuzzy_key_stat *key_stat)
return res;
}
-static ucl_object_t *
-rspamd_fuzzy_stat_to_ucl(struct rspamd_fuzzy_storage_ctx *ctx, gboolean ip_stat)
+static void
+rspamd_fuzzy_key_stat_iter(const unsigned char *pk_iter, struct fuzzy_key *fuzzy_key, ucl_object_t *keys_obj, gboolean ip_stat)
{
- struct fuzzy_key_stat *key_stat;
- GHashTableIter it;
- struct fuzzy_key *fuzzy_key;
- ucl_object_t *obj, *keys_obj, *elt, *ip_elt, *ip_cur;
- gpointer k, v;
- gint i;
+ struct fuzzy_key_stat *key_stat = fuzzy_key->stat;
gchar keyname[17];
- obj = ucl_object_typed_new(UCL_OBJECT);
+ if (key_stat) {
+ rspamd_snprintf(keyname, sizeof(keyname), "%8bs", pk_iter);
- keys_obj = ucl_object_typed_new(UCL_OBJECT);
- g_hash_table_iter_init(&it, ctx->keys);
+ ucl_object_t *elt = rspamd_fuzzy_storage_stat_key(key_stat);
- while (g_hash_table_iter_next(&it, &k, &v)) {
- fuzzy_key = v;
- key_stat = fuzzy_key->stat;
+ if (key_stat->last_ips && ip_stat) {
+ int i = 0;
+ ucl_object_t *ip_elt = ucl_object_typed_new(UCL_OBJECT);
+ gpointer k, v;
- if (key_stat) {
- rspamd_snprintf(keyname, sizeof(keyname), "%8bs", k);
+ while ((i = rspamd_lru_hash_foreach(key_stat->last_ips,
+ i, &k, &v)) != -1) {
+ ucl_object_t *ip_cur = rspamd_fuzzy_storage_stat_key(v);
+ ucl_object_insert_key(ip_elt, ip_cur,
+ rspamd_inet_address_to_string(k), 0, true);
+ }
+ ucl_object_insert_key(elt, ip_elt, "ips", 0, false);
+ }
- elt = rspamd_fuzzy_storage_stat_key(key_stat);
+ int flag;
+ struct fuzzy_key_stat *flag_stat;
+ ucl_object_t *flags_ucl = ucl_object_typed_new(UCL_OBJECT);
- if (key_stat->last_ips && ip_stat) {
- i = 0;
+ kh_foreach_key_value_ptr(fuzzy_key->flags_stat, flag, flag_stat, {
+ char intbuf[16];
+ rspamd_snprintf(intbuf, sizeof(intbuf), "%d", flag);
+ ucl_object_insert_key(flags_ucl, rspamd_fuzzy_storage_stat_key(flag_stat),
+ intbuf, 0, true);
+ });
- ip_elt = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(elt, flags_ucl, "flags", 0, false);
- while ((i = rspamd_lru_hash_foreach(key_stat->last_ips,
- i, &k, &v)) != -1) {
- ip_cur = rspamd_fuzzy_storage_stat_key(v);
- ucl_object_insert_key(ip_elt, ip_cur,
- rspamd_inet_address_to_string(k), 0, true);
- }
- ucl_object_insert_key(elt, ip_elt, "ips", 0, false);
- }
+ ucl_object_insert_key(elt,
+ rspamd_keypair_to_ucl(fuzzy_key->key, RSPAMD_KEYPAIR_DUMP_NO_SECRET | RSPAMD_KEYPAIR_DUMP_FLATTENED),
+ "keypair", 0, false);
+ ucl_object_insert_key(keys_obj, elt, keyname, 0, true);
+ }
+}
- int flag;
- struct fuzzy_key_stat *flag_stat;
- ucl_object_t *flags_ucl = ucl_object_typed_new(UCL_OBJECT);
+static ucl_object_t *
+rspamd_fuzzy_stat_to_ucl(struct rspamd_fuzzy_storage_ctx *ctx, gboolean ip_stat)
+{
+ struct fuzzy_key *fuzzy_key;
+ ucl_object_t *obj, *keys_obj, *elt, *ip_elt;
+ const unsigned char *pk_iter;
- kh_foreach_key_value_ptr(fuzzy_key->flags_stat, flag, flag_stat, {
- char intbuf[16];
- rspamd_snprintf(intbuf, sizeof(intbuf), "%d", flag);
- ucl_object_insert_key(flags_ucl, rspamd_fuzzy_storage_stat_key(flag_stat),
- intbuf, 0, true);
- });
+ obj = ucl_object_typed_new(UCL_OBJECT);
- ucl_object_insert_key(elt, flags_ucl, "flags", 0, false);
+ keys_obj = ucl_object_typed_new(UCL_OBJECT);
- ucl_object_insert_key(elt,
- rspamd_keypair_to_ucl(fuzzy_key->key, RSPAMD_KEYPAIR_DUMP_NO_SECRET | RSPAMD_KEYPAIR_DUMP_FLATTENED),
- "keypair", 0, false);
- ucl_object_insert_key(keys_obj, elt, keyname, 0, true);
- }
- }
+ kh_foreach(ctx->keys, pk_iter, fuzzy_key, {
+ rspamd_fuzzy_key_stat_iter(pk_iter, fuzzy_key, keys_obj, ip_stat);
+ });
ucl_object_insert_key(obj, keys_obj, "keys", 0, false);
@@ -2172,8 +2207,8 @@ rspamd_fuzzy_stat_to_ucl(struct rspamd_fuzzy_storage_ctx *ctx, gboolean ip_stat)
false);
if (ctx->errors_ips && ip_stat) {
- i = 0;
-
+ gpointer k, v;
+ int i = 0;
ip_elt = ucl_object_typed_new(UCL_OBJECT);
while ((i = rspamd_lru_hash_foreach(ctx->errors_ips, i, &k, &v)) != -1) {
@@ -2192,7 +2227,7 @@ rspamd_fuzzy_stat_to_ucl(struct rspamd_fuzzy_storage_ctx *ctx, gboolean ip_stat)
/* Checked by epoch */
elt = ucl_object_typed_new(UCL_ARRAY);
- for (i = RSPAMD_FUZZY_EPOCH10; i < RSPAMD_FUZZY_EPOCH_MAX; i++) {
+ for (int i = RSPAMD_FUZZY_EPOCH10; i < RSPAMD_FUZZY_EPOCH_MAX; i++) {
ucl_array_append(elt,
ucl_object_fromint(ctx->stat.fuzzy_hashes_checked[i]));
}
@@ -2202,7 +2237,7 @@ rspamd_fuzzy_stat_to_ucl(struct rspamd_fuzzy_storage_ctx *ctx, gboolean ip_stat)
/* Shingles by epoch */
elt = ucl_object_typed_new(UCL_ARRAY);
- for (i = RSPAMD_FUZZY_EPOCH10; i < RSPAMD_FUZZY_EPOCH_MAX; i++) {
+ for (int i = RSPAMD_FUZZY_EPOCH10; i < RSPAMD_FUZZY_EPOCH_MAX; i++) {
ucl_array_append(elt,
ucl_object_fromint(ctx->stat.fuzzy_shingles_checked[i]));
}
@@ -2212,7 +2247,7 @@ rspamd_fuzzy_stat_to_ucl(struct rspamd_fuzzy_storage_ctx *ctx, gboolean ip_stat)
/* Matched by epoch */
elt = ucl_object_typed_new(UCL_ARRAY);
- for (i = RSPAMD_FUZZY_EPOCH10; i < RSPAMD_FUZZY_EPOCH_MAX; i++) {
+ for (int i = RSPAMD_FUZZY_EPOCH10; i < RSPAMD_FUZZY_EPOCH_MAX; i++) {
ucl_array_append(elt,
ucl_object_fromint(ctx->stat.fuzzy_hashes_found[i]));
}
@@ -2617,7 +2652,28 @@ fuzzy_parse_keypair(rspamd_mempool_t *pool,
NULL);
keystat->keypair = rspamd_keypair_ref(kp);
/* We map entries by pubkey in binary form for faster lookup */
- g_hash_table_insert(ctx->keys, (gpointer) pk, key);
+ khiter_t k;
+ int r;
+
+ k = kh_put(rspamd_fuzzy_keys_hash, ctx->keys, pk, &r);
+
+ if (r == 0) {
+ msg_err("duplicate keypair found: pk=%*bs",
+ 32, pk);
+ fuzzy_key_dtor(key);
+
+ return FALSE;
+ }
+ else if (r == -1) {
+ msg_err("hash insertion error: pk=%*bs",
+ 32, pk);
+ fuzzy_key_dtor(key);
+
+ return FALSE;
+ }
+
+ kh_val(ctx->keys, k) = key;
+
ctx->default_key = key;
const ucl_object_t *extensions = rspamd_keypair_get_extensions(kp);
@@ -2651,20 +2707,6 @@ fuzzy_parse_keypair(rspamd_mempool_t *pool,
return TRUE;
}
-static guint
-fuzzy_kp_hash(gconstpointer p)
-{
- return *(guint *) p;
-}
-
-static gboolean
-fuzzy_kp_equal(gconstpointer a, gconstpointer b)
-{
- const guchar *pa = a, *pb = b;
-
- return (memcmp(pa, pb, RSPAMD_FUZZY_KEYLEN) == 0);
-}
-
gpointer
init_fuzzy(struct rspamd_config *cfg)
{
@@ -2682,10 +2724,9 @@ init_fuzzy(struct rspamd_config *cfg)
ctx->lua_pre_handler_cbref = -1;
ctx->lua_post_handler_cbref = -1;
ctx->lua_blacklist_cbref = -1;
- ctx->keys = g_hash_table_new_full(fuzzy_kp_hash, fuzzy_kp_equal,
- NULL, fuzzy_key_dtor);
+ ctx->keys = kh_init(rspamd_fuzzy_keys_hash);
rspamd_mempool_add_destructor(cfg->cfg_pool,
- (rspamd_mempool_destruct_t) g_hash_table_unref, ctx->keys);
+ (rspamd_mempool_destruct_t) fuzzy_hash_table_dtor, ctx->keys);
ctx->errors_ips = rspamd_lru_hash_new_full(1024,
(GDestroyNotify) rspamd_inet_address_free, g_free,
rspamd_inet_address_hash, rspamd_inet_address_equal);