]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Reimplement saving/loading the cache items
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 3 Apr 2022 11:36:55 +0000 (12:36 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 3 Apr 2022 11:36:55 +0000 (12:36 +0100)
src/libserver/symcache/symcache_impl.cxx
src/libserver/symcache/symcache_internal.hxx

index aaf8b0cdd4135dc0eab8f8f808729f4e60deb335..7836c80b93edfcc92084416e96446239eca241dc 100644 (file)
@@ -142,7 +142,7 @@ auto symcache::load_items() -> bool
                return false;
        }
 
-       const auto *hdr = (struct symcache_header *)cached_map->get_map();
+       const auto *hdr = (struct symcache_header *) cached_map->get_map();
 
        if (memcmp(hdr->magic, symcache_magic,
                        sizeof(symcache_magic)) != 0) {
@@ -152,7 +152,7 @@ auto symcache::load_items() -> bool
        }
 
        auto *parser = ucl_parser_new(0);
-       const auto *p = (const std::uint8_t *)(hdr + 1);
+       const auto *p = (const std::uint8_t *) (hdr + 1);
 
        if (!ucl_parser_add_chunk(parser, p, cached_map->get_size() - sizeof(*hdr))) {
                msg_info_cache ("cannot use file %s, cannot parse: %s", cfg->cache_filename,
@@ -222,15 +222,13 @@ auto symcache::load_items() -> bool
                        }
 
                        if (item->is_virtual() && !(item->type & SYMBOL_TYPE_GHOST)) {
-                               g_assert (item->specific.virtual.parent < (gint)cache->items_by_id->len);
-                               parent = g_ptr_array_index (cache->items_by_id,
-                                               item->specific.virtual.parent);
-                               item->specific.virtual.parent_item = parent;
+                               const auto &parent = item->get_parent(*this);
 
-                               if (parent->st->weight < item->st->weight) {
-                                       parent->st->weight = item->st->weight;
+                               if (parent) {
+                                       if (parent->st->weight < item->st->weight) {
+                                               parent->st->weight = item->st->weight;
+                                       }
                                }
-
                                /*
                                 * We maintain avg_time for virtual symbols equal to the
                                 * parent item avg_time
@@ -238,8 +236,8 @@ auto symcache::load_items() -> bool
                                item->st->avg_time = parent->st->avg_time;
                        }
 
-                       cache->total_weight += fabs(item->st->weight);
-                       cache->total_hits += item->st->total_hits;
+                       total_weight += fabs(item->st->weight);
+                       total_hits += item->st->total_hits;
                }
        }
 
@@ -249,41 +247,113 @@ auto symcache::load_items() -> bool
        return true;
 }
 
-auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item_ptr &
+template<typename T>
+static constexpr auto round_to_hundreds(T x)
+{
+       return (::floor(x) * 100.0) / 100.0;
+}
+
+bool symcache::save_items() const
+{
+       auto file_sink = util::raii_file_sink::create(cfg->cache_filename,
+                       O_WRONLY | O_TRUNC, 00644);
+
+       if (!file_sink.has_value()) {
+               if (errno == EEXIST) {
+                       /* Some other process is already writing data, give up silently */
+                       return false;
+               }
+
+               msg_err_cache("%s", file_sink.error().c_str());
+
+               return false;
+       }
+
+       struct symcache_header hdr;
+       memset(&hdr, 0, sizeof(hdr));
+       memcpy(hdr.magic, symcache_magic, sizeof(symcache_magic));
+
+       if (write(file_sink->get_fd(), &hdr, sizeof(hdr)) == -1) {
+               msg_err_cache("cannot write to file %s, error %d, %s", cfg->cache_filename,
+                               errno, strerror(errno));
+
+               return false;
+       }
+
+       auto *top = ucl_object_typed_new(UCL_OBJECT);
+
+       for (const auto &it : items_by_symbol) {
+               auto item = it.second;
+               auto elt = ucl_object_typed_new(UCL_OBJECT);
+               ucl_object_insert_key(elt,
+                               ucl_object_fromdouble(round_to_hundreds(item->st->weight)),
+                               "weight", 0, false);
+               ucl_object_insert_key(elt,
+                               ucl_object_fromdouble(round_to_hundreds(item->st->time_counter.mean)),
+                               "time", 0, false);
+               ucl_object_insert_key(elt, ucl_object_fromint(item->st->total_hits),
+                               "count", 0, false);
+
+               auto *freq = ucl_object_typed_new(UCL_OBJECT);
+               ucl_object_insert_key(freq,
+                               ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.mean)),
+                               "avg", 0, false);
+               ucl_object_insert_key(freq,
+                               ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.stddev)),
+                               "stddev", 0, false);
+               ucl_object_insert_key(elt, freq, "frequency", 0, false);
+
+               ucl_object_insert_key(top, elt, it.first.data(), 0, true);
+       }
+
+       auto fp = fdopen(file_sink->get_fd(), "a");
+       auto *efunc = ucl_object_emit_file_funcs(fp);
+       auto ret = ucl_object_emit_full(top, UCL_EMIT_JSON_COMPACT, efunc, nullptr);
+       ucl_object_emit_funcs_free(efunc);
+       ucl_object_unref(top);
+       fclose(fp);
+
+       return ret;
+}
+
+
+auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item *
 {
        if (id < 0 || id >= items_by_id.size()) {
+               g_error("internal error: requested item with id %d, when we have just %d items in the cache",
+                               id, (int)items_by_id.size());
                g_abort();
        }
 
        auto &ret = items_by_id[id];
 
        if (!ret) {
-               g_abort();
+               return nullptr;
        }
 
        if (resolve_parent && ret->is_virtual()) {
                return ret->get_parent(*this);
        }
 
-       return ret;
+       return ret.get();
 }
 
 
-auto cache_item::get_parent(const symcache &cache) const -> const cache_item_ptr &
+auto cache_item::get_parent(const symcache &cache) const -> const cache_item *
 {
        if (is_virtual()) {
                const auto &virtual_sp = std::get<virtual_item>(specific);
 
-               return virtual_sp.get_parent()
+               return virtual_sp.get_parent(cache);
        }
 
-       return cache_item_ptr{nullptr};
+       return nullptr;
 }
 
-auto virtual_item::get_parent(const symcache &cache) const -> const cache_item_ptr &
+auto virtual_item::get_parent(const symcache &cache) const -> const cache_item *
 {
        if (parent) {
-               return parent;
+               return parent.get();
        }
 
        return cache.get_item_by_id(parent_id, false);
index a1207fc9725be27b2996dc22d21ebec923acee75..420004064a5587c96c27459614c79319c59ba1f0 100644 (file)
 #include "lua/lua_common.h"
 
 #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
-        cache->static_pool->tag.tagname, cache->cfg->checksum, \
-        G_STRFUNC, \
+        static_pool->tag.tagname, cfg->checksum, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 #define msg_warn_cache(...)   rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
         static_pool->tag.tagname, cfg->checksum, \
-        G_STRFUNC, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 #define msg_info_cache(...)   rspamd_default_log_function (G_LOG_LEVEL_INFO, \
         static_pool->tag.tagname, cfg->checksum, \
-        G_STRFUNC, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 #define msg_debug_cache(...)  rspamd_conditional_debug_fast (NULL, NULL, \
         rspamd_symcache_log_id, "symcache", cfg->checksum, \
-        G_STRFUNC, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 #define msg_debug_cache_task(...)  rspamd_conditional_debug_fast (NULL, NULL, \
         rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
-        G_STRFUNC, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 
 namespace rspamd::symcache {
@@ -214,7 +214,7 @@ public:
                // TODO
        }
 
-       auto get_parent(const symcache &cache) const -> const cache_item_ptr&;
+       auto get_parent(const symcache &cache) const -> const cache_item *;
 };
 
 struct cache_item {
@@ -253,7 +253,7 @@ struct cache_item {
        std::vector<cache_item_ptr> rdeps;
 
        auto is_virtual() const -> bool { return std::holds_alternative<virtual_item>(specific); }
-       auto get_parent(const symcache &cache) const -> const cache_item_ptr &;
+       auto get_parent(const symcache &cache) const -> const cache_item *;
 };
 
 struct delayed_cache_dependency {
@@ -306,6 +306,7 @@ private:
 private:
        /* Internal methods */
        auto load_items() -> bool;
+       auto save_items() const -> bool;
 
 public:
        explicit symcache(struct rspamd_config *cfg) : cfg(cfg) {
@@ -326,7 +327,7 @@ public:
                }
        }
 
-       auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item_ptr &;
+       auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item *;
 
        /*
         * Initialises the symbols cache, must be called after all symbols are added