From 90b0edae421d31c12cbc8c29fa294f7732bb4f21 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 8 Oct 2022 15:38:49 +0100 Subject: [PATCH] [Rework] Start movement of the hyperscan related routines into a single unit --- src/libutil/CMakeLists.txt | 3 +- src/libutil/cxx/hyperscan_tools.cxx | 97 +++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 src/libutil/cxx/hyperscan_tools.cxx diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt index 7b3103720..8602baf6e 100644 --- a/src/libutil/CMakeLists.txt +++ b/src/libutil/CMakeLists.txt @@ -18,6 +18,7 @@ SET(LIBRSPAMDUTILSRC ${CMAKE_CURRENT_SOURCE_DIR}/heap.c ${CMAKE_CURRENT_SOURCE_DIR}/multipattern.c ${CMAKE_CURRENT_SOURCE_DIR}/cxx/utf8_util.cxx - ${CMAKE_CURRENT_SOURCE_DIR}/cxx/locked_file.cxx) + ${CMAKE_CURRENT_SOURCE_DIR}/cxx/locked_file.cxx + ${CMAKE_CURRENT_SOURCE_DIR}/cxx/hyperscan_tools.cxx) # Rspamdutil SET(RSPAMD_UTIL ${LIBRSPAMDUTILSRC} PARENT_SCOPE) \ No newline at end of file diff --git a/src/libutil/cxx/hyperscan_tools.cxx b/src/libutil/cxx/hyperscan_tools.cxx new file mode 100644 index 000000000..82664d810 --- /dev/null +++ b/src/libutil/cxx/hyperscan_tools.cxx @@ -0,0 +1,97 @@ +/*- + * Copyright 2022 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" + +#ifdef WITH_HYPERSCAN +#include +#include "contrib/ankerl/unordered_dense.h" +#include "contrib/ankerl/svector.h" +#include "fmt/core.h" + +#include /* for glob */ +#include /* for stat */ +#include /* for unlink */ + +namespace rspamd::util { + +class hs_known_files_cache { +private: + // These fields are filled when we add new known cache files + ankerl::svector cache_dirs; + ankerl::svector cache_extensions; + ankerl::unordered_dense::set known_cached_files; + bool need_cleanup = false; +private: + hs_known_files_cache() = default; + + virtual ~hs_known_files_cache() { + // Cleanup cache dir + if (need_cleanup) { + auto cleanup_dir = [&](std::string_view dir) -> void { + for (const auto &ext : cache_extensions) { + glob_t globbuf; + + auto glob_pattern = fmt::format("{}{}*.{}", + dir, G_DIR_SEPARATOR_S, ext); + memset(&globbuf, 0, sizeof(globbuf)); + + if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) { + for (auto i = 0; i < globbuf.gl_pathc; i++) { + const auto *path = globbuf.gl_pathv[i]; + struct stat st; + + if (stat(path, &st) == -1) { + continue; + } + + if (S_ISREG(st.st_mode)) { + if (!known_cached_files.contains(path)) { + unlink(path); + } + } + } + } + + globfree(&globbuf); + } + }; + + for (const auto &dir: cache_dirs) { + cleanup_dir(dir); + } + } + } +public: + hs_known_files_cache(const hs_known_files_cache &) = delete; + hs_known_files_cache(hs_known_files_cache &&) = delete; + + static auto get(bool need_cleanup) -> hs_known_files_cache& { + static hs_known_files_cache *singleton = nullptr; + + if (singleton == nullptr) { + singleton = new hs_known_files_cache; + singleton->need_cleanup = need_cleanup; + } + + return *singleton; + } +}; + + +} // namespace rspamd::util + + +#endif \ No newline at end of file -- 2.39.5