From 1ce6e9b3c7d30e9fef3840ae70eb20859f46d7ff Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 10 Apr 2022 09:05:56 +0100 Subject: [PATCH] [Project] Reimplement dependencies processing --- src/libserver/symcache/symcache_impl.cxx | 114 +++++++++---------- src/libserver/symcache/symcache_internal.hxx | 101 ++-------------- 2 files changed, 62 insertions(+), 153 deletions(-) diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx index a7afae6d9..11fd7b0e6 100644 --- a/src/libserver/symcache/symcache_impl.cxx +++ b/src/libserver/symcache/symcache_impl.cxx @@ -31,7 +31,6 @@ auto symcache::init() -> bool res = load_items(); } - /* Deal with the delayed dependencies */ for (const auto &delayed_dep : *delayed_deps) { auto virt_item = get_item_by_name(delayed_dep.from, false); @@ -78,45 +77,49 @@ auto symcache::init() -> bool } delayed_conditions.reset(); - PTR_ARRAY_FOREACH (cache->items_by_id, i, it) { + for (auto &it : items_by_id) { + it->process_deps(*this); + } - PTR_ARRAY_FOREACH (it->deps, j, dep) { - rspamd_symcache_process_dep(cache, it, dep); - } + for (auto &it : virtual_symbols) { + it->process_deps(*this); + } - if (it->deps) { - /* Reversed loop to make removal safe */ - for (j = it->deps->len - 1; j >= 0; j--) { - dep = g_ptr_array_index (it->deps, j); + /* Sorting stuff */ + auto postfilters_cmp = [](const auto &it1, const auto &it2) -> int { + if (it1->priority > it2-> priority) { + return 1; + } + else if (it1->priority == it2->priority) { + return 0; + } - if (dep->item == NULL) { - /* Remove useless dep */ - g_ptr_array_remove_index(it->deps, j); - } - } + return -1; + }; + auto prefilters_cmp = [](const auto &it1, const auto &it2) -> int { + if (it1->priority > it2-> priority) { + return -1; + } + else if (it1->priority == it2->priority) { + return 0; } - } - /* Special case for virtual symbols */ - PTR_ARRAY_FOREACH (cache->virtual, i, it) { + return 1; + }; - PTR_ARRAY_FOREACH (it->deps, j, dep) { - rspamd_symcache_process_dep(cache, it, dep); - } - } - g_ptr_array_sort_with_data(cache->connfilters, prefilters_cmp, cache); - g_ptr_array_sort_with_data(cache->prefilters, prefilters_cmp, cache); - g_ptr_array_sort_with_data(cache->postfilters, postfilters_cmp, cache); - g_ptr_array_sort_with_data(cache->idempotent, postfilters_cmp, cache); + std::stable_sort(std::begin(connfilters), std::end(connfilters), prefilters_cmp); + std::stable_sort(std::begin(prefilters), std::end(prefilters), prefilters_cmp); + std::stable_sort(std::begin(postfilters), std::end(postfilters), postfilters_cmp); + std::stable_sort(std::begin(idempotent), std::end(idempotent), postfilters_cmp); rspamd_symcache_resort(cache); /* Connect metric symbols with symcache symbols */ - if (cache->cfg->symbols) { - g_hash_table_foreach(cache->cfg->symbols, + if (cfg->symbols) { + g_hash_table_foreach(cfg->symbols, rspamd_symcache_metric_connect_cb, - cache); + this); } return res; @@ -134,7 +137,7 @@ auto symcache::load_items() -> bool if (cached_map->get_size() < (gint) sizeof(symcache_header)) { - msg_info_cache("cannot use file %s, truncated: %z", cfg->cache_filename, , + msg_info_cache("cannot use file %s, truncated: %z", cfg->cache_filename, errno, strerror(errno)); return false; } @@ -373,12 +376,10 @@ auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_f const auto &source = items_by_id[id_from]; g_assert (source.get() != nullptr); - source->deps.emplace_back(cache_dependency{ - .item = cache_item_ptr{nullptr}, - .sym = std::string(to), - .id = id_from, - .vid = -1, - }); + source->deps.emplace_back(cache_item_ptr{nullptr}, + std::string(to), + id_from, + -1); if (virtual_id_from >= 0) { @@ -386,12 +387,10 @@ auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_f /* We need that for settings id propagation */ const auto &vsource = virtual_symbols[virtual_id_from]; g_assert (vsource.get() != nullptr); - vsource->deps.emplace_back(cache_dependency{ - .item = cache_item_ptr{nullptr}, - .sym = std::string(to), - .id = -1, - .vid = virtual_id_from, - }); + vsource->deps.emplace_back(cache_item_ptr{nullptr}, + std::string(to), + -1, + virtual_id_from); } } @@ -423,38 +422,36 @@ auto cache_item::process_deps(const symcache &cache) -> void if (!vdit) { if (dit) { - msg_err_cache ("cannot add dependency from %s on %s: no dependency symbol registered", + msg_err_cache("cannot add dependency from %s on %s: no dependency symbol registered", dep.sym.c_str(), dit->symbol.c_str()); } } else { - msg_debug_cache ("process virtual dependency %s(%d) on %s(%d)", symbol.c_str(), + msg_debug_cache("process virtual dependency %s(%d) on %s(%d)", symbol.c_str(), dep.vid, vdit->symbol.c_str(), vdit->id); std::size_t nids = 0; - msg_debug_cache ("check id propagation for dependency %s from %s", + /* Propagate ids */ + msg_debug_cache("check id propagation for dependency %s from %s", symbol.c_str(), dit->symbol.c_str()); const auto *ids = dit->allowed_ids.get_ids(nids); - /* TODO: merge? */ if (nids > 0) { - msg_info_cache ("propagate allowed ids from %s to %s", + msg_debug_cache("propagate allowed ids from %s to %s", dit->symbol.c_str(), symbol.c_str()); - rspamd_symcache_set_allowed_settings_ids (cache, it->symbol, ids, - nids); + allowed_ids.set_ids(ids, nids, cache.get_pool()); } - ids = rspamd_symcache_get_forbidden_settings_ids (cache, dit->symbol, &nids); + ids = dit->forbidden_ids.get_ids(nids); if (nids > 0) { - msg_info_cache ("propagate forbidden ids from %s to %s", - dit->symbol, it->symbol); + msg_debug_cache("propagate forbidden ids from %s to %s", + dit->symbol.c_str(), symbol.c_str()); - rspamd_symcache_set_forbidden_settings_ids (cache, it->symbol, ids, - nids); + forbidden_ids.set_ids(ids, nids, cache.get_pool()); } } } @@ -524,14 +521,13 @@ auto cache_item::process_deps(const symcache &cache) -> void msg_err_cache ("cannot find dependency on symbol %s for symbol %s", dep.sym.c_str(), symbol.c_str()); - return; - } - - if (vdit) { - /* Use virtual symbol to propagate deps */ - rspamd_symcache_propagate_dep (cache, it, vdit); + continue; } } + + // Remove empty deps + deps.erase(std::remove_if(std::begin(deps), std::end(deps), + [](const auto &dep){ return !dep.item; }), std::end(deps)); } auto virtual_item::get_parent(const symcache &cache) const -> const cache_item * diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx index cb7d206b0..a2b852c19 100644 --- a/src/libserver/symcache/symcache_internal.hxx +++ b/src/libserver/symcache/symcache_internal.hxx @@ -36,6 +36,8 @@ #include "cfg_file.h" #include "lua/lua_common.h" +#include "symcache_id_list.hxx" + #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ "symcache", log_tag(), \ RSPAMD_LOG_FUNC, \ @@ -82,99 +84,6 @@ struct order_generation { using order_generation_ptr = std::shared_ptr; -/* - * This structure is optimised to store ids list: - * - If the first element is -1 then use dynamic part, else use static part - * There is no std::variant to save space - */ -struct id_list { - union { - std::uint32_t st[4]; - struct { - std::uint32_t e; /* First element */ - std::uint16_t len; - std::uint16_t allocated; - std::uint32_t *n; - } dyn; - } data; - - id_list() { - std::memset((void *)&data, 0, sizeof(data)); - } - /** - * Returns ids from a compressed list, accepting a mutable reference for number of elements - * @param nids output of the number of elements - * @return - */ - auto get_ids(std::size_t &nids) const -> const std::uint32_t * { - if (data.dyn.e == -1) { - /* Dynamic list */ - nids = data.dyn.len; - - return data.dyn.n; - } - else { - auto cnt = 0; - - while (data.st[cnt] != 0 && cnt < G_N_ELEMENTS(data.st)) { - cnt ++; - } - - nids = cnt; - - return data.st; - } - } - - auto add_id(std::uint32_t id, rspamd_mempool_t *pool) -> void { - if (data.st[0] == -1) { - /* Dynamic array */ - if (data.dyn.len < data.dyn.allocated) { - /* Trivial, append + sort */ - data.dyn.n[data.dyn.len++] = id; - } - else { - /* Reallocate */ - g_assert (data.dyn.allocated <= G_MAXINT16); - data.dyn.allocated *= 2; - - auto *new_array = rspamd_mempool_alloc_array_type(pool, - data.dyn.allocated, std::uint32_t); - memcpy(new_array, data.dyn.n, data.dyn.len * sizeof(std::uint32_t)); - data.dyn.n = new_array; - data.dyn.n[data.dyn.len++] = id; - } - - std::sort(data.dyn.n, data.dyn.n + data.dyn.len); - } - else { - /* Static part */ - auto cnt = 0u; - while (data.st[cnt] != 0 && cnt < G_N_ELEMENTS (data.st)) { - cnt ++; - } - - if (cnt < G_N_ELEMENTS (data.st)) { - data.st[cnt] = id; - } - else { - /* Switch to dynamic */ - data.dyn.allocated = G_N_ELEMENTS (data.st) * 2; - auto *new_array = rspamd_mempool_alloc_array_type(pool, - data.dyn.allocated, std::uint32_t); - memcpy (new_array, data.st, sizeof(data.st)); - data.dyn.n = new_array; - data.dyn.e = -1; /* Marker */ - data.dyn.len = G_N_ELEMENTS (data.st); - - /* Recursively jump to dynamic branch that will handle insertion + sorting */ - add_id(id, pool); // tail call - } - } - } - - -}; class symcache; @@ -264,7 +173,7 @@ struct cache_item : std::enable_shared_from_this { /* Dependencies */ std::vector deps; /* Reverse dependencies */ - std::vector rdeps; + std::vector rdeps; public: [[nodiscard]] static auto create() -> cache_item_ptr { @@ -426,6 +335,10 @@ public: auto log_tag() const -> const char* { return cfg->checksum; } + + auto get_pool() const { + return static_pool; + } }; /* -- 2.39.5