]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Reimplement dependencies processing
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 10 Apr 2022 08:05:56 +0000 (09:05 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 10 Apr 2022 08:05:56 +0000 (09:05 +0100)
src/libserver/symcache/symcache_impl.cxx
src/libserver/symcache/symcache_internal.hxx

index a7afae6d9d5f044c98ed48a755bebbc736e33084..11fd7b0e61d89ac9aa2ba0879b12fee5277d8441 100644 (file)
@@ -31,7 +31,6 @@ auto symcache::init() -> bool
                res = load_items();
        }
 
-
        /* Deal with the delayed dependencies */
        for (const auto &delayed_dep : *delayed_deps) {
                auto virt_item = get_item_by_name(delayed_dep.from, false);
@@ -78,45 +77,49 @@ auto symcache::init() -> bool
        }
        delayed_conditions.reset();
 
-       PTR_ARRAY_FOREACH (cache->items_by_id, i, it) {
+       for (auto &it : items_by_id) {
+               it->process_deps(*this);
+       }
 
-               PTR_ARRAY_FOREACH (it->deps, j, dep) {
-                       rspamd_symcache_process_dep(cache, it, dep);
-               }
+       for (auto &it : virtual_symbols) {
+               it->process_deps(*this);
+       }
 
-               if (it->deps) {
-                       /* Reversed loop to make removal safe */
-                       for (j = it->deps->len - 1; j >= 0; j--) {
-                               dep = g_ptr_array_index (it->deps, j);
+       /* Sorting stuff */
+       auto postfilters_cmp = [](const auto &it1, const auto &it2) -> int {
+               if (it1->priority > it2-> priority) {
+                       return 1;
+               }
+               else if (it1->priority == it2->priority) {
+                       return 0;
+               }
 
-                               if (dep->item == NULL) {
-                                       /* Remove useless dep */
-                                       g_ptr_array_remove_index(it->deps, j);
-                               }
-                       }
+               return -1;
+       };
+       auto prefilters_cmp = [](const auto &it1, const auto &it2) -> int {
+               if (it1->priority > it2-> priority) {
+                       return -1;
+               }
+               else if (it1->priority == it2->priority) {
+                       return 0;
                }
-       }
 
-       /* Special case for virtual symbols */
-       PTR_ARRAY_FOREACH (cache->virtual, i, it) {
+               return 1;
+       };
 
-               PTR_ARRAY_FOREACH (it->deps, j, dep) {
-                       rspamd_symcache_process_dep(cache, it, dep);
-               }
-       }
 
-       g_ptr_array_sort_with_data(cache->connfilters, prefilters_cmp, cache);
-       g_ptr_array_sort_with_data(cache->prefilters, prefilters_cmp, cache);
-       g_ptr_array_sort_with_data(cache->postfilters, postfilters_cmp, cache);
-       g_ptr_array_sort_with_data(cache->idempotent, postfilters_cmp, cache);
+       std::stable_sort(std::begin(connfilters), std::end(connfilters), prefilters_cmp);
+       std::stable_sort(std::begin(prefilters), std::end(prefilters), prefilters_cmp);
+       std::stable_sort(std::begin(postfilters), std::end(postfilters), postfilters_cmp);
+       std::stable_sort(std::begin(idempotent), std::end(idempotent), postfilters_cmp);
 
        rspamd_symcache_resort(cache);
 
        /* Connect metric symbols with symcache symbols */
-       if (cache->cfg->symbols) {
-               g_hash_table_foreach(cache->cfg->symbols,
+       if (cfg->symbols) {
+               g_hash_table_foreach(cfg->symbols,
                                rspamd_symcache_metric_connect_cb,
-                               cache);
+                               this);
        }
 
        return res;
@@ -134,7 +137,7 @@ auto symcache::load_items() -> bool
 
 
        if (cached_map->get_size() < (gint) sizeof(symcache_header)) {
-               msg_info_cache("cannot use file %s, truncated: %z", cfg->cache_filename, ,
+               msg_info_cache("cannot use file %s, truncated: %z", cfg->cache_filename,
                                errno, strerror(errno));
                return false;
        }
@@ -373,12 +376,10 @@ auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_f
        const auto &source = items_by_id[id_from];
        g_assert (source.get() != nullptr);
 
-       source->deps.emplace_back(cache_dependency{
-                       .item = cache_item_ptr{nullptr},
-                       .sym = std::string(to),
-                       .id = id_from,
-                       .vid = -1,
-       });
+       source->deps.emplace_back(cache_item_ptr{nullptr},
+                       std::string(to),
+                       id_from,
+                       -1);
 
 
        if (virtual_id_from >= 0) {
@@ -386,12 +387,10 @@ auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_f
                /* We need that for settings id propagation */
                const auto &vsource = virtual_symbols[virtual_id_from];
                g_assert (vsource.get() != nullptr);
-               vsource->deps.emplace_back(cache_dependency{
-                               .item = cache_item_ptr{nullptr},
-                               .sym = std::string(to),
-                               .id = -1,
-                               .vid = virtual_id_from,
-               });
+               vsource->deps.emplace_back(cache_item_ptr{nullptr},
+                               std::string(to),
+                               -1,
+                               virtual_id_from);
        }
 }
 
@@ -423,38 +422,36 @@ auto cache_item::process_deps(const symcache &cache) -> void
 
                        if (!vdit) {
                                if (dit) {
-                                       msg_err_cache ("cannot add dependency from %s on %s: no dependency symbol registered",
+                                       msg_err_cache("cannot add dependency from %s on %s: no dependency symbol registered",
                                                        dep.sym.c_str(), dit->symbol.c_str());
                                }
                        }
                        else {
-                               msg_debug_cache ("process virtual dependency %s(%d) on %s(%d)", symbol.c_str(),
+                               msg_debug_cache("process virtual dependency %s(%d) on %s(%d)", symbol.c_str(),
                                                dep.vid, vdit->symbol.c_str(), vdit->id);
 
                                std::size_t nids = 0;
 
-                               msg_debug_cache ("check id propagation for dependency %s from %s",
+                               /* Propagate ids */
+                               msg_debug_cache("check id propagation for dependency %s from %s",
                                                symbol.c_str(), dit->symbol.c_str());
 
                                const auto *ids = dit->allowed_ids.get_ids(nids);
 
-                               /* TODO: merge? */
                                if (nids > 0) {
-                                       msg_info_cache ("propagate allowed ids from %s to %s",
+                                       msg_debug_cache("propagate allowed ids from %s to %s",
                                                        dit->symbol.c_str(), symbol.c_str());
 
-                                       rspamd_symcache_set_allowed_settings_ids (cache, it->symbol, ids,
-                                                       nids);
+                                       allowed_ids.set_ids(ids, nids, cache.get_pool());
                                }
 
-                               ids = rspamd_symcache_get_forbidden_settings_ids (cache, dit->symbol, &nids);
+                               ids = dit->forbidden_ids.get_ids(nids);
 
                                if (nids > 0) {
-                                       msg_info_cache ("propagate forbidden ids from %s to %s",
-                                                       dit->symbol, it->symbol);
+                                       msg_debug_cache("propagate forbidden ids from %s to %s",
+                                                       dit->symbol.c_str(), symbol.c_str());
 
-                                       rspamd_symcache_set_forbidden_settings_ids (cache, it->symbol, ids,
-                                                       nids);
+                                       forbidden_ids.set_ids(ids, nids, cache.get_pool());
                                }
                        }
                }
@@ -524,14 +521,13 @@ auto cache_item::process_deps(const symcache &cache) -> void
                        msg_err_cache ("cannot find dependency on symbol %s for symbol %s",
                                        dep.sym.c_str(), symbol.c_str());
 
-                       return;
-               }
-
-               if (vdit) {
-                       /* Use virtual symbol to propagate deps */
-                       rspamd_symcache_propagate_dep (cache, it, vdit);
+                       continue;
                }
        }
+
+       // Remove empty deps
+       deps.erase(std::remove_if(std::begin(deps), std::end(deps),
+                       [](const auto &dep){ return !dep.item; }), std::end(deps));
 }
 
 auto virtual_item::get_parent(const symcache &cache) const -> const cache_item *
index cb7d206b0be3ec04ed3712d7e79020409c0bce34..a2b852c1924a0395783978e05b4d3c06585f6c4e 100644 (file)
@@ -36,6 +36,8 @@
 #include "cfg_file.h"
 #include "lua/lua_common.h"
 
+#include "symcache_id_list.hxx"
+
 #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
         "symcache", log_tag(), \
         RSPAMD_LOG_FUNC, \
@@ -82,99 +84,6 @@ struct order_generation {
 
 using order_generation_ptr = std::shared_ptr<order_generation>;
 
-/*
- * This structure is optimised to store ids list:
- * - If the first element is -1 then use dynamic part, else use static part
- * There is no std::variant to save space
- */
-struct id_list {
-       union {
-               std::uint32_t st[4];
-               struct {
-                       std::uint32_t e; /* First element */
-                       std::uint16_t len;
-                       std::uint16_t allocated;
-                       std::uint32_t *n;
-               } dyn;
-       } data;
-
-       id_list() {
-               std::memset((void *)&data, 0, sizeof(data));
-       }
-       /**
-        * Returns ids from a compressed list, accepting a mutable reference for number of elements
-        * @param nids output of the number of elements
-        * @return
-        */
-       auto get_ids(std::size_t &nids) const -> const std::uint32_t * {
-               if (data.dyn.e == -1) {
-                       /* Dynamic list */
-                       nids = data.dyn.len;
-
-                       return data.dyn.n;
-               }
-               else {
-                       auto cnt = 0;
-
-                       while (data.st[cnt] != 0 && cnt < G_N_ELEMENTS(data.st)) {
-                               cnt ++;
-                       }
-
-                       nids = cnt;
-
-                       return data.st;
-               }
-       }
-
-       auto add_id(std::uint32_t id, rspamd_mempool_t *pool) -> void {
-               if (data.st[0] == -1) {
-                       /* Dynamic array */
-                       if (data.dyn.len < data.dyn.allocated) {
-                               /* Trivial, append + sort */
-                               data.dyn.n[data.dyn.len++] = id;
-                       }
-                       else {
-                               /* Reallocate */
-                               g_assert (data.dyn.allocated <= G_MAXINT16);
-                               data.dyn.allocated *= 2;
-
-                               auto *new_array = rspamd_mempool_alloc_array_type(pool,
-                                               data.dyn.allocated, std::uint32_t);
-                               memcpy(new_array, data.dyn.n, data.dyn.len * sizeof(std::uint32_t));
-                               data.dyn.n = new_array;
-                               data.dyn.n[data.dyn.len++] = id;
-                       }
-
-                       std::sort(data.dyn.n, data.dyn.n + data.dyn.len);
-               }
-               else {
-                       /* Static part */
-                       auto cnt = 0u;
-                       while (data.st[cnt] != 0 && cnt < G_N_ELEMENTS (data.st)) {
-                               cnt ++;
-                       }
-
-                       if (cnt < G_N_ELEMENTS (data.st)) {
-                               data.st[cnt] = id;
-                       }
-                       else {
-                               /* Switch to dynamic */
-                               data.dyn.allocated = G_N_ELEMENTS (data.st) * 2;
-                               auto *new_array = rspamd_mempool_alloc_array_type(pool,
-                                               data.dyn.allocated, std::uint32_t);
-                               memcpy (new_array, data.st, sizeof(data.st));
-                               data.dyn.n = new_array;
-                               data.dyn.e = -1; /* Marker */
-                               data.dyn.len = G_N_ELEMENTS (data.st);
-
-                               /* Recursively jump to dynamic branch that will handle insertion + sorting */
-                               add_id(id, pool); // tail call
-                       }
-               }
-       }
-
-
-};
 
 class symcache;
 
@@ -264,7 +173,7 @@ struct cache_item : std::enable_shared_from_this<cache_item> {
        /* Dependencies */
        std::vector<cache_dependency> deps;
        /* Reverse dependencies */
-       std::vector<cache_item_ptr> rdeps;
+       std::vector<cache_dependency> rdeps;
 
 public:
        [[nodiscard]] static auto create() -> cache_item_ptr {
@@ -426,6 +335,10 @@ public:
        auto log_tag() const -> const char* {
                return cfg->checksum;
        }
+
+       auto get_pool() const {
+               return static_pool;
+       }
 };
 
 /*