]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Rework cleanup
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 23 Oct 2022 20:37:38 +0000 (21:37 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 23 Oct 2022 20:37:38 +0000 (21:37 +0100)
src/libserver/hyperscan_tools.cxx
src/libserver/hyperscan_tools.h
src/libserver/maps/map_helpers.c
src/rspamd.c

index 3383915c39968311e380217b34c01ac2a01d024b..6ec5f7c36b0147c4ed12b0f6d19fa54057147ec0 100644 (file)
@@ -67,49 +67,9 @@ private:
 
        virtual ~hs_known_files_cache() {
                // Cleanup cache dir
-               /* We clean dir merely if we are running from the main process */
-               if (rspamd_current_worker == nullptr) {
-                       auto cleanup_dir = [&](std::string_view dir) -> void {
-                               for (const auto &ext : cache_extensions) {
-                                       glob_t globbuf;
-
-                                       auto glob_pattern = fmt::format("{}{}*.{}",
-                                               dir, G_DIR_SEPARATOR_S, ext);
-                                       memset(&globbuf, 0, sizeof(globbuf));
-
-                                       if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) {
-                                               for (auto i = 0; i < globbuf.gl_pathc; i++) {
-                                                       const auto *path = globbuf.gl_pathv[i];
-                                                       struct stat st;
-
-                                                       if (stat(path, &st) == -1) {
-                                                               msg_debug_hyperscan("cannot stat file %s: %s",
-                                                                       path, strerror(errno));
-                                                               continue;
-                                                       }
-
-                                                       if (S_ISREG(st.st_mode)) {
-                                                               if (!known_cached_files.contains(path)) {
-                                                                       msg_info_hyperscan("remove stale hyperscan file %s", path);
-                                                                       unlink(path);
-                                                               }
-                                                               else {
-                                                                       msg_debug_hyperscan("found known hyperscan file %s, size: %Hz",
-                                                                               path, st.st_size);
-                                                               }
-                                                       }
-                                               }
-                                       }
-
-                                       globfree(&globbuf);
-                               }
-                       };
-
-                       for (const auto &dir: cache_dirs) {
-                               cleanup_dir(dir);
-                       }
-               }
+               cleanup_maybe();
        }
+
        /* Have to duplicate raii_file methods to use raw filenames */
        static auto get_dir(std::string_view fname) -> std::string_view {
                auto sep_pos = fname.rfind(G_DIR_SEPARATOR);
@@ -177,8 +137,13 @@ public:
        }
 
        void add_cached_file(const char *fname) {
-               auto dir = hs_known_files_cache::get_dir(fname);
-               auto ext =  hs_known_files_cache::get_extension(fname);
+
+               auto mut_fname = std::string{fname};
+               std::size_t sz;
+               rspamd_http_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
+               mut_fname.resize(sz);
+               auto dir = hs_known_files_cache::get_dir(mut_fname);
+               auto ext =  hs_known_files_cache::get_extension(mut_fname);
 
                if (std::find_if(cache_dirs.begin(), cache_dirs.end(),
                        [&](const auto& item){ return item == dir; }) == std::end(cache_dirs)) {
@@ -189,10 +154,60 @@ public:
                        cache_extensions.emplace_back(std::string{ext});
                }
 
-               auto is_known = known_cached_files.insert(fname);
+               auto is_known = known_cached_files.insert(mut_fname);
                msg_debug_hyperscan("added %s known hyperscan file: %s",
                        is_known.second ? "new" : "already",
-                       fname);
+                       mut_fname.c_str());
+       }
+
+       auto cleanup_maybe() -> void {
+               /* We clean dir merely if we are running from the main process */
+               if (rspamd_current_worker == nullptr) {
+                       auto cleanup_dir = [&](std::string_view dir) -> void {
+                               for (const auto &ext : cache_extensions) {
+                                       glob_t globbuf;
+
+                                       auto glob_pattern = fmt::format("{}{}*.{}",
+                                               dir, G_DIR_SEPARATOR_S, ext);
+                                       memset(&globbuf, 0, sizeof(globbuf));
+
+                                       if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) {
+                                               for (auto i = 0; i < globbuf.gl_pathc; i++) {
+                                                       const auto *path = globbuf.gl_pathv[i];
+                                                       struct stat st;
+
+                                                       if (stat(path, &st) == -1) {
+                                                               msg_debug_hyperscan("cannot stat file %s: %s",
+                                                                       path, strerror(errno));
+                                                               continue;
+                                                       }
+
+                                                       if (S_ISREG(st.st_mode)) {
+                                                               if (!known_cached_files.contains(path)) {
+                                                                       msg_info_hyperscan("remove stale hyperscan file %s", path);
+                                                                       unlink(path);
+                                                               }
+                                                               else {
+                                                                       msg_debug_hyperscan("found known hyperscan file %s, size: %Hz",
+                                                                               path, st.st_size);
+                                                               }
+                                                       }
+                                               }
+                                       }
+
+                                       globfree(&globbuf);
+                               }
+                       };
+
+                       for (const auto &dir: cache_dirs) {
+                               msg_debug_hyperscan("cleaning up directory %s", dir.c_str());
+                               cleanup_dir(dir);
+                       }
+
+                       cache_dirs.clear();
+                       cache_extensions.clear();
+                       known_cached_files.clear();
+               }
        }
 };
 
@@ -333,7 +348,6 @@ auto load_cached_hs_file(const char *fname) -> tl::expected<hs_shared_database,
                        if (unserialized_file.has_value()) {
 
                                auto &unserialized_checked = unserialized_file.value();
-                               hs_cache.add_cached_file(unserialized_checked);
 
                                if (unserialized_checked.get_size() == 0) {
                                        /*
@@ -344,6 +358,7 @@ auto load_cached_hs_file(const char *fname) -> tl::expected<hs_shared_database,
                                        return hs_shared_from_serialized(std::forward<T>(cached_serialized));
                                }
                                else {
+                                       hs_cache.add_cached_file(unserialized_checked);
                                        return raii_mmaped_file::mmap_shared(std::move(unserialized_checked), PROT_READ)
                                                .and_then([&]<class U>(U &&mmapped_unserialized) -> auto {
                                                        return hs_shared_from_unserialized(std::forward<U>(mmapped_unserialized));
@@ -444,4 +459,10 @@ rspamd_hyperscan_notice_known(const char *fname)
        }
 }
 
+void
+rspamd_hyperscan_cleanup_maybe(void)
+{
+       rspamd::util::hs_known_files_cache::get().cleanup_maybe();
+}
+
 #endif // WITH_HYPERSCAN
\ No newline at end of file
index 50ca5154393252f37f2e373372250f698d03d6bf..5d50e07ec9ed00e3bd634f24fc45ae8b657a54fe 100644 (file)
@@ -60,6 +60,11 @@ void rspamd_hyperscan_free(rspamd_hyperscan_t *db);
  */
 void rspamd_hyperscan_notice_known(const char *fname);
 
+/**
+ * Cleans up old files. This method should be called on config free (in the main process)
+ */
+void rspamd_hyperscan_cleanup_maybe(void);
+
 G_END_DECLS
 
 #endif
index 6381e6d511d5fc641b5549849dc7ad0ac531bca1..8850d052ce765556d75f4e45eeafe363144c2a8f 100644 (file)
@@ -26,6 +26,7 @@
 
 #ifdef WITH_HYPERSCAN
 #include "hs.h"
+#include "hyperscan_tools.h"
 #endif
 #ifndef WITH_PCRE2
 #include <pcre.h>
@@ -82,7 +83,7 @@ struct rspamd_regexp_map_helper {
        khash_t(rspamd_map_hash) *htb;
        enum rspamd_regexp_map_flags map_flags;
 #ifdef WITH_HYPERSCAN
-       hs_database_t *hs_db;
+       rspamd_hyperscan_t *hs_db;
        hs_scratch_t *hs_scratch;
        gchar **patterns;
        gint *flags;
@@ -883,7 +884,7 @@ rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map)
                hs_free_scratch (re_map->hs_scratch);
        }
        if (re_map->hs_db) {
-               hs_free_database (re_map->hs_db);
+               rspamd_hyperscan_free(re_map->hs_db);
        }
        if (re_map->patterns) {
                for (i = 0; i < re_map->regexps->len; i ++) {
@@ -1055,112 +1056,11 @@ rspamd_radix_dtor (struct map_cb_data *data)
 }
 
 #ifdef WITH_HYPERSCAN
-struct rspamd_re_maps_cache_dtor_cbdata {
-       struct rspamd_config *cfg;
-       GHashTable *valid_re_hashes;
-       gchar *dirname;
-};
-
-static void
-rspamd_re_maps_cache_cleanup_dtor (gpointer ud)
-{
-       struct rspamd_re_maps_cache_dtor_cbdata *cbd =
-                       (struct rspamd_re_maps_cache_dtor_cbdata *)ud;
-       GPtrArray *cache_files;
-       GError *err = NULL;
-       struct rspamd_config *cfg;
-
-       cfg = cbd->cfg;
-
-       if (cfg->cur_worker != NULL) {
-               /* Skip dtor, limit it to main process only */
-               return;
-       }
-
-       cache_files = rspamd_glob_path (cbd->dirname, "*.hsmc", FALSE, &err);
-
-       if (!cache_files) {
-               msg_err_config ("cannot glob files in %s: %e", cbd->dirname, err);
-               g_error_free (err);
-       }
-       else {
-               const gchar *fname;
-               guint i;
-
-               PTR_ARRAY_FOREACH (cache_files, i, fname) {
-                       gchar *basename = g_path_get_basename (fname);
-
-                       if (g_hash_table_lookup (cbd->valid_re_hashes, basename) == NULL) {
-                               gchar *dir;
-
-                               dir = g_path_get_dirname (fname);
-
-                               /* Sanity check to avoid removal of something bad */
-                               if (strcmp (dir, cbd->dirname) != 0) {
-                                       msg_err_config ("bogus file found: %s in %s, skip deleting",
-                                                       fname, dir);
-                               }
-                               else {
-                                       if (unlink (fname) == -1) {
-                                               msg_err_config ("cannot delete obsolete file %s in %s: %s",
-                                                               fname, dir, strerror (errno));
-                                       }
-                                       else {
-                                               msg_info_config ("deleted obsolete file %s in %s",
-                                                               fname, dir);
-                                       }
-                               }
-
-                               g_free (dir);
-                       }
-                       else {
-                               msg_debug_config ("valid re cache file %s", fname);
-                       }
-
-                       g_free (basename);
-               }
-
-               g_ptr_array_free (cache_files, TRUE);
-       }
-
-       g_hash_table_unref (cbd->valid_re_hashes);
-       g_free (cbd->dirname);
-}
-
-static void
-rspamd_re_map_cache_update (const gchar *fname, struct rspamd_config *cfg)
-{
-       GHashTable *valid_re_hashes;
-
-       valid_re_hashes = rspamd_mempool_get_variable (cfg->cfg_pool,
-                       RSPAMD_MEMPOOL_RE_MAPS_CACHE);
-
-       if (!valid_re_hashes) {
-               valid_re_hashes = g_hash_table_new_full (g_str_hash, g_str_equal,
-                               g_free, NULL);
-               rspamd_mempool_set_variable (cfg->cfg_pool,
-                               RSPAMD_MEMPOOL_RE_MAPS_CACHE,
-                               valid_re_hashes, (rspamd_mempool_destruct_t)g_hash_table_unref);
-
-               /* We also add a cleanup dtor for all hashes */
-               static struct rspamd_re_maps_cache_dtor_cbdata cbd;
-
-               cbd.valid_re_hashes = g_hash_table_ref (valid_re_hashes);
-               cbd.cfg = cfg;
-               cbd.dirname = g_path_get_dirname (fname);
-               rspamd_mempool_add_destructor (cfg->cfg_pool,
-                               rspamd_re_maps_cache_cleanup_dtor, &cbd);
-       }
-
-       g_hash_table_insert (valid_re_hashes, g_path_get_basename (fname), "1");
-}
 
 static gboolean
 rspamd_try_load_re_map_cache (struct rspamd_regexp_map_helper *re_map)
 {
        gchar fp[PATH_MAX];
-       gpointer data;
-       gsize len;
        struct rspamd_map *map;
 
        map = re_map->map;
@@ -1173,25 +1073,9 @@ rspamd_try_load_re_map_cache (struct rspamd_regexp_map_helper *re_map)
                        map->cfg->hs_cache_dir,
                        (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
 
-       if ((data = rspamd_file_xmap (fp, PROT_READ, &len, TRUE)) != NULL) {
-               if (hs_deserialize_database (data, len, &re_map->hs_db) == HS_SUCCESS) {
-                       rspamd_re_map_cache_update (fp, map->cfg);
-                       munmap (data, len);
-
-                       msg_info_map ("loaded hypersan cache from %s (%Hz length) for %s",
-                                       fp, len, map->name);
-
-                       return TRUE;
-               }
-
-               msg_info_map ("invalid hypersan cache in %s (%Hz length) for %s, removing file",
-                               fp, len, map->name);
-               munmap (data, len);
-               /* Remove stale file */
-               (void)unlink (fp);
-       }
+       re_map->hs_db = rspamd_hyperscan_maybe_load(fp);
 
-       return FALSE;
+       return re_map->hs_db != NULL;
 }
 
 static gboolean
@@ -1214,7 +1098,7 @@ rspamd_try_save_re_map_cache (struct rspamd_regexp_map_helper *re_map)
                        (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
 
        if ((fd = rspamd_file_xopen (fp, O_WRONLY | O_CREAT | O_EXCL, 00644, 0)) != -1) {
-               if (hs_serialize_database (re_map->hs_db, &bytes, &len) == HS_SUCCESS) {
+               if (hs_serialize_database (rspamd_hyperscan_get_database(re_map->hs_db), &bytes, &len) == HS_SUCCESS) {
                        if (write (fd, bytes, len) == -1) {
                                msg_warn_map ("cannot write hyperscan cache to %s: %s",
                                                fp, strerror (errno));
@@ -1237,8 +1121,7 @@ rspamd_try_save_re_map_cache (struct rspamd_regexp_map_helper *re_map)
                                else {
                                        msg_info_map ("written cached hyperscan data for %s to %s (%Hz length)",
                                                        map->name, np, len);
-
-                                       rspamd_re_map_cache_update (np, map->cfg);
+                                       rspamd_hyperscan_notice_known(np);
                                }
                        }
                }
@@ -1255,43 +1138,6 @@ rspamd_try_save_re_map_cache (struct rspamd_regexp_map_helper *re_map)
        return FALSE;
 }
 
-static gboolean
-rspamd_re_map_cache_cleanup_old (struct rspamd_regexp_map_helper *old_re_map)
-{
-       gchar fp[PATH_MAX];
-       struct rspamd_map *map;
-       gboolean ret = TRUE;
-
-       map = old_re_map->map;
-
-       if (!map->cfg->hs_cache_dir) {
-               return FALSE;
-       }
-
-       rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc",
-                       map->cfg->hs_cache_dir,
-                       (gint)rspamd_cryptobox_HASHBYTES / 2, old_re_map->re_digest);
-
-       msg_info_map ("unlink stale cache file for %s: %s", map->name, fp);
-
-       if (unlink (fp) == -1) {
-               msg_warn_map ("cannot unlink stale cache file for %s (%s): %s",
-                               map->name, fp, strerror (errno));
-               ret = FALSE;
-       }
-
-       GHashTable *valid_re_hashes;
-
-       valid_re_hashes = rspamd_mempool_get_variable (map->cfg->cfg_pool,
-                       RSPAMD_MEMPOOL_RE_MAPS_CACHE);
-
-       if (valid_re_hashes) {
-               g_hash_table_remove (valid_re_hashes, fp);
-       }
-
-       return ret;
-}
-
 #endif
 
 static void
@@ -1376,6 +1222,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
 
                if (!rspamd_try_load_re_map_cache (re_map)) {
                        gdouble ts1 = rspamd_get_ticks (FALSE);
+                       hs_database_t *hs_db = NULL;
 
                        if (hs_compile_multi ((const gchar **) re_map->patterns,
                                        re_map->flags,
@@ -1383,7 +1230,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
                                        re_map->regexps->len,
                                        HS_MODE_BLOCK,
                                        &plt,
-                                       &re_map->hs_db,
+                                       &hs_db,
                                        &err) != HS_SUCCESS) {
 
                                msg_err_map ("cannot create tree of regexp when processing '%s': %s",
@@ -1396,6 +1243,8 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
                                return;
                        }
 
+                       re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db);
+
                        ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
                        msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
                                        re_map->regexps->len, re_map->map->name, ts1);
@@ -1406,9 +1255,9 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
                                        re_map->regexps->len, re_map->map->name);
                }
 
-               if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) {
+               if (hs_alloc_scratch (rspamd_hyperscan_get_database(re_map->hs_db), &re_map->hs_scratch) != HS_SUCCESS) {
                        msg_err_map ("cannot allocate scratch space for hyperscan");
-                       hs_free_database (re_map->hs_db);
+                       rspamd_hyperscan_free(re_map->hs_db);
                        re_map->hs_db = NULL;
                }
        }
@@ -1547,15 +1396,6 @@ rspamd_regexp_list_fin (struct map_cb_data *data, void **target)
 
                if (data->prev_data) {
                        old_re_map = data->prev_data;
-
-#ifdef WITH_HYPERSCAN
-                       if (re_map && memcmp(re_map->re_digest, old_re_map->re_digest,
-                                       sizeof(re_map->re_digest)) != 0) {
-                               /* Cleanup old stuff */
-                               rspamd_re_map_cache_cleanup_old(old_re_map);
-                       }
-#endif
-
                        rspamd_map_helper_destroy_regexp(old_re_map);
                }
        }
@@ -1614,8 +1454,9 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
 
                if (validated) {
 
-                       res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
-                                       rspamd_match_hs_single_handler, (void *)&i);
+                       res = hs_scan (rspamd_hyperscan_get_database(map->hs_db), in, len, 0,
+                               map->hs_scratch,
+                               rspamd_match_hs_single_handler, (void *)&i);
 
                        if (res == HS_SCAN_TERMINATED) {
                                res = 1;
@@ -1711,7 +1552,8 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
                        cbd.ar = ret;
                        cbd.map = map;
 
-                       if (hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
+                       if (hs_scan (rspamd_hyperscan_get_database(map->hs_db), in, len,
+                                       0, map->hs_scratch,
                                        rspamd_match_hs_multiple_handler, &cbd) == HS_SUCCESS) {
                                res = 1;
                        }
index 3779e7f8eecff982e6afc08d27d81034c3b04aac..d8371de55c2368e98ea8f6657c546fa3a8a71c79 100644 (file)
 #include "sqlite3.h"
 #include "contrib/libev/ev.h"
 
+#ifdef WITH_HYPERSCAN
+#include "libserver/hyperscan_tools.h"
+#endif
+
 /* 2 seconds to fork new process in place of dead one */
 #define SOFT_FORK_TIME 2
 
@@ -1643,6 +1647,9 @@ main (gint argc, gchar **argv, gchar **env)
 
        msg_info_main ("terminating...");
 
+#ifdef WITH_HYPERSCAN
+       rspamd_hyperscan_cleanup_maybe();
+#endif
        REF_RELEASE (rspamd_main->cfg);
        rspamd_log_close (rspamd_main->logger);
        g_hash_table_unref (rspamd_main->spairs);