]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Use a more sane data structure and refactor
authorVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 21 Nov 2023 15:08:08 +0000 (15:08 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 21 Nov 2023 15:08:08 +0000 (15:08 +0000)
src/controller.c
src/fuzzy_storage.c
src/libserver/fuzzy_wire.h
src/libserver/mempool_vars_internal.h
src/plugins/fuzzy_check.c

index 13e6794afc2657cd147c4f6484683174c3b1fb10..eb58db2112a6e9c36d06c6c910e83b263f32bd59 100644 (file)
@@ -33,6 +33,7 @@
 #include "unix-std.h"
 #include "utlist.h"
 #include "libmime/lang_detection.h"
+#include "mempool_vars_internal.h"
 #include <math.h>
 
 /* 60 seconds for worker's IO */
@@ -2602,14 +2603,15 @@ rspamd_controller_stat_fin_task(void *ud)
                ucl_object_insert_key(top, cbdata->stat, "statfiles", 0, false);
        }
 
-       GList *fuzzy_elts = rspamd_mempool_get_variable(cbdata->task->task_pool, "fuzzy_stat");
+       GHashTable *fuzzy_elts = rspamd_mempool_get_variable(cbdata->task->task_pool, RSPAMD_MEMPOOL_FUZZY_STAT);
 
        if (fuzzy_elts) {
                ar = ucl_object_typed_new(UCL_OBJECT);
 
-               for (GList *cur = fuzzy_elts; cur != NULL; cur = g_list_next(cur)) {
-                       entry = cur->data;
+               GHashTableIter it;
 
+               g_hash_table_iter_init(&it, fuzzy_elts);
+               while (g_hash_table_iter_next(&it, NULL, (gpointer *) &entry)) {
                        if (entry->name) {
                                ucl_object_insert_key(ar, ucl_object_fromint(entry->fuzzy_cnt),
                                                                          entry->name, 0, true);
@@ -3053,14 +3055,16 @@ rspamd_controller_metrics_fin_task(void *ud)
                rspamd_fstring_free(users);
        }
 
-       GList *fuzzy_elts = rspamd_mempool_get_variable(cbdata->task->task_pool, "fuzzy_stat");
+       GHashTable *fuzzy_elts = rspamd_mempool_get_variable(cbdata->task->task_pool, RSPAMD_MEMPOOL_FUZZY_STAT);
 
        if (fuzzy_elts) {
                rspamd_printf_fstring(&output, "# HELP rspamd_fuzzy_stat Fuzzy stat labelled by storage.\n");
                rspamd_printf_fstring(&output, "# TYPE rspamd_fuzzy_stat gauge\n");
-               for (GList *cur = fuzzy_elts; cur != NULL; cur = g_list_next(cur)) {
-                       entry = cur->data;
 
+               GHashTableIter it;
+
+               g_hash_table_iter_init(&it, fuzzy_elts);
+               while (g_hash_table_iter_next(&it, NULL, (gpointer *) &entry)) {
                        if (entry->name) {
                                rspamd_printf_fstring(&output, "rspamd_fuzzy_stat{storage=\"%s\"} %ud\n",
                                                                          entry->name, entry->fuzzy_cnt);
index 99d2ef1a1d12373619330b84a712ffe86aecd0bd..569889660e28534a534bd52a4b8ad749fac88ba4 100644 (file)
@@ -1335,7 +1335,7 @@ rspamd_fuzzy_process_command(struct fuzzy_session *session)
                result.v1.prob = 1.0f;
                /* Store high qword in value and low qword in flag */
                result.v1.value = (gint32) ((guint64) session->ctx->stat.fuzzy_hashes >> 32);
-               result.v1.flag = session->ctx->stat.fuzzy_hashes & G_MAXUINT32;
+               result.v1.flag = (guint32) (session->ctx->stat.fuzzy_hashes & G_MAXUINT32);
                rspamd_fuzzy_make_reply(cmd, &result, session, send_flags);
        }
        else if (cmd->cmd == FUZZY_PING) {
index 989a31eb4ddbe4e81db09fbf950808a0a1869eec..c2f93b8dc77b9ca4cfd086f1fcebee0cfd4659a4 100644 (file)
@@ -135,7 +135,7 @@ struct rspamd_fuzzy_cmd_extension {
 
 struct rspamd_fuzzy_stat_entry {
        const gchar *name;
-       guint32 fuzzy_cnt;
+       guint64 fuzzy_cnt;
 };
 
 RSPAMD_PACKED(fuzzy_peer_cmd)
index 72cf1b0955562f6af6b261167d50d7d3d87d819e..6c9553868af270c68a5aca60f4fc33bd06b09623 100644 (file)
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
+/*
+ * Copyright 2023 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -42,5 +42,6 @@
 #define RSPAMD_MEMPOOL_HAM_LEARNS "ham_learns"
 #define RSPAMD_MEMPOOL_RE_MAPS_CACHE "re_maps_cache"
 #define RSPAMD_MEMPOOL_HTTP_STAT_BACKEND_RUNTIME "stat_http_runtime"
+#define RSPAMD_MEMPOOL_FUZZY_STAT "fuzzy_stat"
 
 #endif
index ed85d793300499a48eb6295204fb760088268823..85db83d08816e310ddcf7f342733f768081d4fd9 100644 (file)
@@ -2448,24 +2448,36 @@ fuzzy_check_try_read(struct fuzzy_client_session *session)
                                        fuzzy_insert_result(session, rep, cmd, io, rep->v1.flag);
                                }
                                else if (cmd->cmd == FUZZY_STAT) {
-                                       /* Just set pool variable to extract it in further */
+                                       /*
+                                        * We store fuzzy stat in the following way:
+                                        * 1) We store fuzzy hashes as a hash of rspamd_fuzzy_stat_entry
+                                        * 2) We store the resulting hash table inside pool variable `fuzzy_stat`
+                                        */
                                        struct rspamd_fuzzy_stat_entry *pval;
-                                       GList *res;
+                                       GHashTable *stats_hash;
 
-                                       pval = rspamd_mempool_alloc(task->task_pool, sizeof(*pval));
-                                       pval->fuzzy_cnt = rep->v1.flag;
-                                       pval->name = session->rule->name;
+                                       stats_hash = (GHashTable *) rspamd_mempool_get_variable(task->task_pool,
+                                                                                                                                                       RSPAMD_MEMPOOL_FUZZY_STAT);
 
-                                       res = rspamd_mempool_get_variable(task->task_pool, "fuzzy_stat");
-
-                                       if (res == NULL) {
-                                               res = g_list_append(NULL, pval);
-                                               rspamd_mempool_set_variable(task->task_pool, "fuzzy_stat",
-                                                                                                       res, (rspamd_mempool_destruct_t) g_list_free);
+                                       if (stats_hash == NULL) {
+                                               stats_hash = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+                                               rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_FUZZY_STAT,
+                                                                                                       stats_hash,
+                                                                                                       (rspamd_mempool_destruct_t) g_hash_table_destroy);
                                        }
-                                       else {
-                                               res = g_list_append(res, pval);
+
+                                       pval = g_hash_table_lookup(stats_hash, session->rule->name);
+
+                                       if (pval == NULL) {
+                                               pval = rspamd_mempool_alloc(task->task_pool,
+                                                                                                       sizeof(*pval));
+                                               pval->name = rspamd_mempool_strdup(task->task_pool,
+                                                                                                                  session->rule->name);
+                                               /* Safe, as pval->name is owned by the pool */
+                                               g_hash_table_insert(stats_hash, (char *) pval->name, pval);
                                        }
+
+                                       pval->fuzzy_cnt = (((guint64) rep->v1.value) << 32) + rep->v1.flag;
                                }
                        }
                        else if (rep->v1.value == 403) {