From de1cf75eecf2bdc4f02737a5b59b791baf55213a Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 16 Oct 2022 15:25:04 +0100 Subject: [Minor] Move hyperscan tools to the libserver domain --- src/libutil/CMakeLists.txt | 3 +- src/libutil/cxx/hyperscan_tools.cxx | 291 ------------------------------------ 2 files changed, 1 insertion(+), 293 deletions(-) delete mode 100644 src/libutil/cxx/hyperscan_tools.cxx (limited to 'src/libutil') diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt index 8602baf6e..7b3103720 100644 --- a/src/libutil/CMakeLists.txt +++ b/src/libutil/CMakeLists.txt @@ -18,7 +18,6 @@ SET(LIBRSPAMDUTILSRC ${CMAKE_CURRENT_SOURCE_DIR}/heap.c ${CMAKE_CURRENT_SOURCE_DIR}/multipattern.c ${CMAKE_CURRENT_SOURCE_DIR}/cxx/utf8_util.cxx - ${CMAKE_CURRENT_SOURCE_DIR}/cxx/locked_file.cxx - ${CMAKE_CURRENT_SOURCE_DIR}/cxx/hyperscan_tools.cxx) + ${CMAKE_CURRENT_SOURCE_DIR}/cxx/locked_file.cxx) # Rspamdutil SET(RSPAMD_UTIL ${LIBRSPAMDUTILSRC} PARENT_SCOPE) \ No newline at end of file diff --git a/src/libutil/cxx/hyperscan_tools.cxx b/src/libutil/cxx/hyperscan_tools.cxx deleted file mode 100644 index f5086d1e4..000000000 --- a/src/libutil/cxx/hyperscan_tools.cxx +++ /dev/null @@ -1,291 +0,0 @@ -/*- - * Copyright 2022 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "config.h" - -#ifdef WITH_HYPERSCAN -#include -#include "contrib/ankerl/unordered_dense.h" -#include "contrib/ankerl/svector.h" -#include "fmt/core.h" -#include "locked_file.hxx" -#include "hs.h" -#include "logger.h" -#include "locked_file.hxx" - -#include /* for glob */ -#include /* for unlink */ -#include -#include "unix-std.h" - -#define msg_info_hyperscan(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ - "hyperscan", "", \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) - -#define msg_debug_hyperscan(...) rspamd_conditional_debug_fast (NULL, NULL, \ - rspamd_hyperscan_log_id, "hyperscan", "", \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) - -INIT_LOG_MODULE_PUBLIC(hyperscan) - -namespace rspamd::util { - -class hs_known_files_cache { -private: - // These fields are filled when we add new known cache files - ankerl::svector cache_dirs; - ankerl::svector cache_extensions; - ankerl::unordered_dense::set known_cached_files; - bool need_cleanup = false; -private: - hs_known_files_cache() = default; - - virtual ~hs_known_files_cache() { - // Cleanup cache dir - if (need_cleanup) { - auto cleanup_dir = [&](std::string_view dir) -> void { - for (const auto &ext : cache_extensions) { - glob_t globbuf; - - auto glob_pattern = fmt::format("{}{}*.{}", - dir, G_DIR_SEPARATOR_S, ext); - memset(&globbuf, 0, sizeof(globbuf)); - - if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) { - for (auto i = 0; i < globbuf.gl_pathc; i++) { - const auto *path = globbuf.gl_pathv[i]; - struct stat st; - - if (stat(path, &st) == -1) { - msg_debug_hyperscan("cannot stat file %s: %s", - path, strerror(errno)); - continue; - } - - if (S_ISREG(st.st_mode)) { - if (!known_cached_files.contains(path)) { - msg_info_hyperscan("remove stale hyperscan file %s", path); - unlink(path); - } - else { - msg_debug_hyperscan("found known hyperscan file %s, size: %Hz", - path, st.st_size); - } - } - } - } - - globfree(&globbuf); - } - }; - - for (const auto &dir: cache_dirs) { - cleanup_dir(dir); - } - } - } -public: - hs_known_files_cache(const hs_known_files_cache &) = delete; - hs_known_files_cache(hs_known_files_cache &&) = delete; - - static auto get(bool need_cleanup) -> hs_known_files_cache& { - static hs_known_files_cache *singleton = nullptr; - - if (singleton == nullptr) { - singleton = new hs_known_files_cache; - singleton->need_cleanup = need_cleanup; - } - - return *singleton; - } - - void add_cached_file(const raii_file &file) { - auto dir = file.get_dir(); - auto ext = file.get_extension(); - - if (std::find_if(cache_dirs.begin(), cache_dirs.end(), - [&](const auto& item){ return item == dir; }) == std::end(cache_dirs)) { - cache_dirs.emplace_back(std::string{dir}); - } - if (std::find_if(cache_extensions.begin(), cache_extensions.end(), - [&](const auto& item){ return item == ext; }) == std::end(cache_extensions)) { - cache_extensions.emplace_back(std::string{ext}); - } - - known_cached_files.insert(file.get_name()); - msg_debug_hyperscan("added new known hyperscan file: %*s", (int)file.get_name().size(), - file.get_name().data()); - } -}; - - -/** - * This is a higher level representation of the cached hyperscan file - */ -struct hs_shared_database { - hs_database_t *db; /**< internal database (might be in a shared memory) */ - std::optional maybe_map; - - ~hs_shared_database() { - if (!maybe_map) { - hs_free_database(db); - } - // Otherwise, handled by maybe_map dtor - } - - explicit hs_shared_database(raii_mmaped_file &&map, hs_database_t *db) : db(db), maybe_map(std::move(map)) {} - explicit hs_shared_database(hs_database_t *db) : db(db), maybe_map(std::nullopt) {} -}; - -static auto -hs_shared_from_unserialized(raii_mmaped_file &&map) -> tl::expected -{ - auto ptr = map.get_map(); - return tl::expected{tl::in_place, std::move(map), (hs_database_t *)ptr}; -} - -static auto -hs_shared_from_serialized(raii_mmaped_file &&map) -> tl::expected -{ - hs_database_t *target = nullptr; - - if (auto ret = hs_deserialize_database((const char *)map.get_map(), map.get_size(), &target); ret != HS_SUCCESS) { - return tl::make_unexpected("cannot deserialize database"); - } - - return tl::expected{tl::in_place, target}; -} - -auto load_cached_hs_file(const char *fname, bool need_cleanup) -> tl::expected -{ - auto &hs_cache = hs_known_files_cache::get(need_cleanup); - - return raii_mmaped_file::mmap_shared(fname, O_RDONLY, PROT_READ) - .and_then([&](T &&cached_serialized) -> tl::expected { -#if defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4 - auto unserialized_fname = fmt::format("{}.unser", fname); - auto unserialized_file = raii_locked_file::create(unserialized_fname.c_str(), O_CREAT | O_RDWR | O_EXCL, - 00644) - .and_then([&](auto &&new_file_locked) -> tl::expected { - auto tmpfile_pattern = fmt::format("{}{}hsmp-XXXXXXXXXXXXXXXXXX", - cached_serialized.get_file().get_dir(), G_DIR_SEPARATOR); - auto tmpfile = raii_locked_file::mkstemp(tmpfile_pattern.data(), O_CREAT | O_RDWR | O_EXCL, - 00644); - - if (!tmpfile) { - return tl::make_unexpected(tmpfile.error()); - } - else { - auto &tmpfile_checked = tmpfile.value(); - std::size_t unserialized_size; - - hs_serialized_database_size((const char *)cached_serialized.get_map(), - cached_serialized.get_size(), &unserialized_size); - - msg_debug("multipattern: create new database in %s; %Hz size", - tmpfile_pattern.data(), unserialized_size); - void *buf; - posix_memalign(&buf, 16, unserialized_size); - if (buf == NULL) { - return tl::make_unexpected("Cannot allocate memory"); - } - - // Store owned string - auto tmpfile_name = std::string{tmpfile_checked.get_name()}; - - if (auto ret = hs_deserialize_database_at((const char *)cached_serialized.get_map(), - cached_serialized.get_size(), (hs_database_t *) buf); ret != HS_SUCCESS) { - return tl::make_unexpected( - fmt::format("cannot deserialize hyperscan database: {}", ret)); - } - else { - if (write(tmpfile_checked.get_fd(), buf, unserialized_size) == -1) { - free(buf); - return tl::make_unexpected(fmt::format("cannot write to {}: {}", - tmpfile_name, ::strerror(errno))); - } - else { - free(buf); - - /* - * Unlink target file before renaming to avoid - * race condition. - * So what we have is that `new_file_locked` - * will have flock on that file, so it will be - * replaced after unlink safely, and also unlocked. - */ - (void) unlink(unserialized_fname.c_str()); - if (rename(tmpfile_name.c_str(), - unserialized_fname.c_str()) == -1) { - if (errno != EEXIST) { - msg_err("cannot rename %s -> %s: %s", - tmpfile_name.c_str(), - unserialized_fname.c_str(), - strerror(errno)); - } - } - else { - /* Unlock file but mark it as immortal first to avoid deletion */ - tmpfile_checked.make_immortal(); - (void) tmpfile_checked.unlock(); - } - } - } - /* Reopen in RO mode */ - return raii_file::open(unserialized_fname.c_str(), O_RDONLY); - }; - }) - .or_else([&](auto unused) -> tl::expected { - // Cannot create file, so try to open it in RO mode - return raii_file::open(unserialized_fname.c_str(), O_RDONLY); - }); - - hs_cache.add_cached_file(cached_serialized.get_file()); - - if (unserialized_file.has_value()) { - - auto &unserialized_checked = unserialized_file.value(); - hs_cache.add_cached_file(unserialized_checked); - - if (unserialized_checked.get_size() == 0) { - /* - * This is a case when we have a file that is currently - * being created by another process. - * We cannot use it! - */ - return hs_shared_from_serialized(std::forward(cached_serialized)); - } - else { - return raii_mmaped_file::mmap_shared(std::move(unserialized_checked), PROT_READ) - .and_then([&](U &&mmapped_unserialized) -> auto { - return hs_shared_from_unserialized(std::forward(mmapped_unserialized)); - }); - } - } - else { - return hs_shared_from_serialized(std::forward(cached_serialized)); - } -#else // defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4 - hs_cache.add_cached_file(cached_serialized.get_file()); - return hs_shared_from_serialized(std::forward(cached_serialized)); -#endif // defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4 - }); -} -} // namespace rspamd::util - - -#endif // WITH_HYPERSCAN \ No newline at end of file -- cgit v1.2.3