From 309ae8e66a97d65804bce0e75efb2769ceb7a4ee Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 31 Mar 2022 21:55:51 +0100 Subject: [PATCH] [Project] Start rewrite symcache in c++ --- src/libserver/CMakeLists.txt | 2 +- ...{rspamd_symcache.c => rspamd_symcache.cxx} | 215 +++++++++++------- src/libserver/rspamd_symcache.h | 3 + 3 files changed, 132 insertions(+), 88 deletions(-) rename src/libserver/{rspamd_symcache.c => rspamd_symcache.cxx} (97%) diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt index a4fdbbfcb..7371e8ade 100644 --- a/src/libserver/CMakeLists.txt +++ b/src/libserver/CMakeLists.txt @@ -20,7 +20,7 @@ SET(LIBRSPAMDSERVERSRC ${CMAKE_CURRENT_SOURCE_DIR}/roll_history.c ${CMAKE_CURRENT_SOURCE_DIR}/spf.c ${CMAKE_CURRENT_SOURCE_DIR}/ssl_util.c - ${CMAKE_CURRENT_SOURCE_DIR}/rspamd_symcache.c + ${CMAKE_CURRENT_SOURCE_DIR}/rspamd_symcache.cxx ${CMAKE_CURRENT_SOURCE_DIR}/task.c ${CMAKE_CURRENT_SOURCE_DIR}/url.c ${CMAKE_CURRENT_SOURCE_DIR}/worker_util.c diff --git a/src/libserver/rspamd_symcache.c b/src/libserver/rspamd_symcache.cxx similarity index 97% rename from src/libserver/rspamd_symcache.c rename to src/libserver/rspamd_symcache.cxx index d2989d213..a1aa8c504 100644 --- a/src/libserver/rspamd_symcache.c +++ b/src/libserver/rspamd_symcache.cxx @@ -23,13 +23,17 @@ #include "unix-std.h" #include "contrib/t1ha/t1ha.h" #include "libserver/worker_util.h" -#include "khash.h" -#include "utlist.h" -#include -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# include -#endif +#include +#include +#include +#include +#include +#include +#include +#include "libutil/cxx/local_shared_ptr.hxx" + +#include "contrib/robin-hood/robin_hood.h" #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ cache->static_pool->tag.tagname, cache->cfg->checksum, \ @@ -67,26 +71,35 @@ INIT_LOG_MODULE(symcache) (dyn_item)->finished = 1 #define CLR_FINISH_BIT(checkpoint, dyn_item) \ (dyn_item)->finished = 0 -static const guchar rspamd_symcache_magic[8] = {'r', 's', 'c', 2, 0, 0, 0, 0 }; + +namespace rspamd::symcache { + +static const std::uint8_t rspamd_symcache_magic[8] = {'r', 's', 'c', 2, 0, 0, 0, 0}; struct rspamd_symcache_header { - guchar magic[8]; - guint nitems; - guchar checksum[64]; - guchar unused[128]; + std::uint8_t magic[8]; + unsigned int nitems; + std::uint8_t checksum[64]; + std::uint8_t unused[128]; }; -struct symcache_order { - GPtrArray *d; - guint id; - ref_entry_t ref; +struct cache_item; +using cache_item_ptr = rspamd::local_shared_ptr; +using cache_item_weak_ptr = rspamd::local_weak_ptr; + +struct order_generation { + std::vector d; + unsigned int generation_id; }; +using order_generation_ptr = rspamd::local_shared_ptr; + /* * This structure is optimised to store ids list: * - If the first element is -1 then use dynamic part, else use static part + * There is no std::variant to save space */ -struct rspamd_symcache_id_list { +struct id_list { union { guint32 st[4]; struct { @@ -98,97 +111,135 @@ struct rspamd_symcache_id_list { }; }; -struct rspamd_symcache_condition { +struct item_condition { +private: gint cb; - struct rspamd_symcache_condition *prev, *next; + lua_State *L; +public: + item_condition() { + // TODO + } + virtual ~item_condition() { + // TODO + } }; -struct rspamd_symcache_item { +class normal_item { +private: + symbol_func_t func; + void *user_data; + std::vector conditions; +public: + explicit normal_item() { + // TODO + } + auto add_condition() -> void { + // TODO + } + auto call() -> void { + // TODO + } +}; + +class virtual_item { +private: + int parent_id; + cache_item_ptr parent; +public: + explicit virtual_item() { + // TODO + } +}; + +struct cache_item { /* This block is likely shared */ struct rspamd_symcache_item_stat *st; - - guint64 last_count; struct rspamd_counter_data *cd; - gchar *symbol; - const gchar *type_descr; - gint type; + + std::uint64_t last_count; + std::string symbol; + std::string_view type_descr; + int type; /* Callback data */ - union { - struct { - symbol_func_t func; - gpointer user_data; - struct rspamd_symcache_condition *conditions; - } normal; - struct { - gint parent; - struct rspamd_symcache_item *parent_item; - } virtual; - } specific; + std::variant specific; /* Condition of execution */ - gboolean enabled; - /* Used for async stuff checks */ - gboolean is_filter; - gboolean is_virtual; + bool enabled; /* Priority */ - gint priority; + int priority; /* Topological order */ - guint order; - gint id; - gint frequency_peaks; + unsigned int order; + /* Unique id - counter */ + int id; + + int frequency_peaks; /* Settings ids */ - struct rspamd_symcache_id_list allowed_ids; + id_list allowed_ids; /* Allows execution but not symbols insertion */ - struct rspamd_symcache_id_list exec_only_ids; - struct rspamd_symcache_id_list forbidden_ids; + id_list exec_only_ids; + id_list forbidden_ids; /* Dependencies */ - GPtrArray *deps; - GPtrArray *rdeps; + std::vector deps; + /* Reverse dependencies */ + std::vector rdeps; +}; - /* Container */ - GPtrArray *container; +struct delayed_cache_dependency { + std::string from; + std::string to; +}; + +struct delayed_cache_condition { + std::string sym; + int cbref; + lua_State *L; }; struct rspamd_symcache { - /* Hash table for fast access */ - GHashTable *items_by_symbol; - GPtrArray *items_by_id; - struct symcache_order *items_by_order; - GPtrArray *connfilters; - GPtrArray *prefilters; - GPtrArray *filters; - GPtrArray *postfilters; - GPtrArray *composites; - GPtrArray *idempotent; - GPtrArray *virtual; - GList *delayed_deps; - GList *delayed_conditions; + /* Map indexed by symbol name: all symbols must have unique names, so this map holds ownership */ + robin_hood::unordered_flat_map items_by_symbol; + std::vector items_by_id; + + /* Items sorted into some order */ + order_generation_ptr items_by_order; + unsigned int cur_order_gen; + + std::vector connfilters; + std::vector prefilters; + std::vector filters; + std::vector postfilters; + std::vector composites; + std::vector idempotent; + std::vector virtual_symbols; + + std::vector delayed_deps; + std::vector delayed_conditions; + rspamd_mempool_t *static_pool; - guint64 cksum; - gdouble total_weight; - guint used_items; - guint stats_symbols_count; - guint64 total_hits; - guint id; + std::uint64_t cksum; + double total_weight; + std::size_t used_items; + std::size_t stats_symbols_count; + std::uint64_t total_hits; + struct rspamd_config *cfg; - gdouble reload_time; - gdouble last_profile; - gint peak_cb; + double reload_time; + double last_profile; + int peak_cb; }; -struct rspamd_symcache_dynamic_item { +struct cache_dynamic_item { guint16 start_msec; /* Relative to task time */ - unsigned started:1; - unsigned finished:1; + unsigned started: 1; + unsigned finished: 1; /* unsigned pad:14; */ guint32 async_events; }; - struct cache_dependency { struct rspamd_symcache_item *item; /* Real dependency */ gchar *sym; /* Symbolic dep name */ @@ -196,17 +247,6 @@ struct cache_dependency { gint vid; /* Virtual from */ }; -struct delayed_cache_dependency { - gchar *from; - gchar *to; -}; - -struct delayed_cache_condition { - gchar *sym; - gint cbref; - lua_State *L; -}; - struct cache_savepoint { guint version; guint items_inflight; @@ -229,6 +269,7 @@ struct rspamd_cache_refresh_cbdata { struct rspamd_worker *w; struct ev_loop *event_loop; }; +} // namespace rspamd /* At least once per minute */ #define PROFILE_MAX_TIME (60.0) diff --git a/src/libserver/rspamd_symcache.h b/src/libserver/rspamd_symcache.h index 1d670db04..303544d7b 100644 --- a/src/libserver/rspamd_symcache.h +++ b/src/libserver/rspamd_symcache.h @@ -69,6 +69,9 @@ struct rspamd_abstract_callback_data { char data[]; }; +/** + * Shared memory block specific for each symbol + */ struct rspamd_symcache_item_stat { struct rspamd_counter_data time_counter; gdouble avg_time; -- 2.39.5