From 79044bfc0263cdb73dddbddb5d8cccdf138af4f9 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 25 Feb 2023 14:53:31 +0000 Subject: [Fix] Further checks for the hs_scratch_alloc Issue: #4409 --- src/libserver/hyperscan_tools.cxx | 47 ++++++++++++++++++++++++++++++++++----- src/libserver/hyperscan_tools.h | 4 ++-- src/libserver/maps/map_helpers.c | 15 ++++++++++--- src/libserver/re_cache.c | 6 ++--- src/libutil/multipattern.c | 36 ++++++++++++++++++++++++++---- 5 files changed, 90 insertions(+), 18 deletions(-) diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx index cb2e15c9a..615aa57aa 100644 --- a/src/libserver/hyperscan_tools.cxx +++ b/src/libserver/hyperscan_tools.cxx @@ -183,6 +183,35 @@ public: mut_fname.c_str()); } + void delete_cached_file(const char *fname) { + auto mut_fname = std::string{fname}; + std::size_t sz; + + rspamd_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz); + mut_fname.resize(sz); + + if (mut_fname.empty()) { + msg_err_hyperscan("attempt to remove an empty hyperscan file!"); + return; + } + + if (access(mut_fname.c_str(), R_OK) != -1) { + if (unlink(mut_fname.c_str()) == -1) { + msg_err_hyperscan("cannot remove hyperscan file %s: %s", + mut_fname.c_str(), strerror(errno)); + } + else { + msg_debug_hyperscan("removed hyperscan file %s", mut_fname.c_str()); + } + } + else { + msg_err_hyperscan("attempt to remove non-existent hyperscan file %s: %s", + mut_fname.c_str(), strerror(errno)); + } + + known_cached_files.erase(mut_fname); + } + auto cleanup_maybe() -> void { auto env_cleanup_disable = std::getenv("RSPAMD_NO_CLEANUP"); /* We clean dir merely if we are running from the main process */ @@ -248,6 +277,7 @@ public: struct hs_shared_database { hs_database_t *db = nullptr; /**< internal database (might be in a shared memory) */ std::optional maybe_map; + std::string cached_path; ~hs_shared_database() { if (!maybe_map) { @@ -256,8 +286,10 @@ struct hs_shared_database { // Otherwise, handled by maybe_map dtor } - explicit hs_shared_database(raii_mmaped_file &&map, hs_database_t *db) : db(db), maybe_map(std::move(map)) {} - explicit hs_shared_database(hs_database_t *db) : db(db), maybe_map(std::nullopt) {} + explicit hs_shared_database(raii_mmaped_file &&map, hs_database_t *db) : db(db), maybe_map(std::move(map)) { + cached_path = maybe_map.value().get_file().get_name(); + } + explicit hs_shared_database(hs_database_t *db, const char *fname) : db(db), maybe_map(std::nullopt), cached_path{fname} {} hs_shared_database(const hs_shared_database &other) = delete; hs_shared_database() = default; hs_shared_database(hs_shared_database &&other) noexcept { @@ -287,7 +319,7 @@ hs_shared_from_serialized(raii_mmaped_file &&map, std::int64_t offset) -> tl::ex return tl::make_unexpected(error {"cannot deserialize database", ret}); } - return tl::expected{tl::in_place, target}; + return tl::expected{tl::in_place, target, map.get_file().get_name().data()}; } auto load_cached_hs_file(const char *fname, std::int64_t offset = 0) -> tl::expected @@ -464,18 +496,21 @@ rspamd_hyperscan_get_database(rspamd_hyperscan_t *db) } rspamd_hyperscan_t * -rspamd_hyperscan_from_raw_db(hs_database_t *db) +rspamd_hyperscan_from_raw_db(hs_database_t *db, const char *fname) { - auto *ndb = new rspamd::util::hs_shared_database{db}; + auto *ndb = new rspamd::util::hs_shared_database{db, fname}; return C_DB_FROM_CXX(ndb); } void -rspamd_hyperscan_free(rspamd_hyperscan_t *db) +rspamd_hyperscan_free(rspamd_hyperscan_t *db, bool invalid) { auto *real_db = CXX_DB_FROM_C(db); + if (invalid) { + rspamd::util::hs_known_files_cache::get().delete_cached_file(real_db->cached_path.c_str()); + } delete real_db; } diff --git a/src/libserver/hyperscan_tools.h b/src/libserver/hyperscan_tools.h index e66e2ec91..d1707f490 100644 --- a/src/libserver/hyperscan_tools.h +++ b/src/libserver/hyperscan_tools.h @@ -41,7 +41,7 @@ rspamd_hyperscan_t *rspamd_hyperscan_maybe_load(const char *filename, goffset of * @param filename * @return */ -rspamd_hyperscan_t *rspamd_hyperscan_from_raw_db(hs_database_t *db); +rspamd_hyperscan_t *rspamd_hyperscan_from_raw_db(hs_database_t *db, const char *fname); /** * Get the internal database * @param db @@ -52,7 +52,7 @@ hs_database_t* rspamd_hyperscan_get_database(rspamd_hyperscan_t *db); * Free the database * @param db */ -void rspamd_hyperscan_free(rspamd_hyperscan_t *db); +void rspamd_hyperscan_free(rspamd_hyperscan_t *db, bool invalid); /** * Notice a known hyperscan file (e.g. externally serialized) diff --git a/src/libserver/maps/map_helpers.c b/src/libserver/maps/map_helpers.c index 9e649414a..2fa7743b4 100644 --- a/src/libserver/maps/map_helpers.c +++ b/src/libserver/maps/map_helpers.c @@ -884,7 +884,7 @@ rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map) hs_free_scratch (re_map->hs_scratch); } if (re_map->hs_db) { - rspamd_hyperscan_free(re_map->hs_db); + rspamd_hyperscan_free(re_map->hs_db, false); } if (re_map->patterns) { for (i = 0; i < re_map->regexps->len; i ++) { @@ -1243,7 +1243,16 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map) return; } - re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db); + if (re_map->map->cfg->hs_cache_dir) { + char fpath[PATH_MAX]; + rspamd_snprintf(fpath, sizeof(fpath), "%s/%*xs.hsmc", + re_map->map->cfg->hs_cache_dir, + (gint) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest); + re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db, fpath); + } + else { + re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db, NULL); + } ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0; msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms", @@ -1257,7 +1266,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map) if (hs_alloc_scratch (rspamd_hyperscan_get_database(re_map->hs_db), &re_map->hs_scratch) != HS_SUCCESS) { msg_err_map ("cannot allocate scratch space for hyperscan"); - rspamd_hyperscan_free(re_map->hs_db); + rspamd_hyperscan_free(re_map->hs_db, true); re_map->hs_db = NULL; } } diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 3f108fda5..f4d2496b1 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -196,7 +196,7 @@ rspamd_re_cache_destroy (struct rspamd_re_cache *cache) #ifdef WITH_HYPERSCAN if (re_class->hs_db) { - rspamd_hyperscan_free(re_class->hs_db); + rspamd_hyperscan_free(re_class->hs_db, false); } if (re_class->hs_scratch) { hs_free_scratch (re_class->hs_scratch); @@ -2561,7 +2561,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, } if (re_class->hs_db != NULL) { - rspamd_hyperscan_free (re_class->hs_db); + rspamd_hyperscan_free (re_class->hs_db, false); } if (re_class->hs_ids) { @@ -2594,7 +2594,6 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, if ((ret = hs_alloc_scratch (rspamd_hyperscan_get_database(re_class->hs_db), &re_class->hs_scratch)) != HS_SUCCESS) { - rspamd_hyperscan_free (re_class->hs_db); if (!try_load) { msg_err_re_cache ("bad hs database in %s; error code: %d", path, ret); } @@ -2604,6 +2603,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, g_free (hs_ids); g_free (hs_flags); + rspamd_hyperscan_free (re_class->hs_db, true); re_class->hs_ids = NULL; re_class->hs_scratch = NULL; re_class->hs_db = NULL; diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c index 489ac1beb..0d6e8b4f4 100644 --- a/src/libutil/multipattern.c +++ b/src/libutil/multipattern.c @@ -504,16 +504,44 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err) return FALSE; } - mp->hs_db = rspamd_hyperscan_from_raw_db(db); + + if (hs_cache_dir != NULL) { + char fpath[PATH_MAX]; + rspamd_snprintf (fpath, sizeof (fpath), "%s/%*xs.hsmp", hs_cache_dir, + (gint)rspamd_cryptobox_HASHBYTES / 2, hash); + mp->hs_db = rspamd_hyperscan_from_raw_db(db, fpath); + } + else { + /* Should not happen in the real life */ + mp->hs_db = rspamd_hyperscan_from_raw_db(db, NULL); + } } rspamd_multipattern_try_save_hs (mp, hash); + for (i = 0; i < MAX_SCRATCH; i ++) { + mp->scratch[i] = NULL; + } + for (i = 0; i < MAX_SCRATCH; i ++) { int ret; + if ((ret = hs_alloc_scratch (rspamd_hyperscan_get_database(mp->hs_db), &mp->scratch[i])) != HS_SUCCESS) { - msg_err("fatal error: cannot allocate scratch space for hyperscan: %d", ret); - g_abort(); + msg_err("cannot allocate scratch space for hyperscan: error code %d", ret); + + /* Clean all scratches that are non-NULL */ + for (int ii = 0; ii < MAX_SCRATCH; ii ++) { + if (mp->scratch[ii] != NULL) { + hs_free_scratch(mp->scratch[ii]); + } + } + g_set_error (err, rspamd_multipattern_quark (), EINVAL, + "cannot allocate scratch space for hyperscan: error code %d", ret); + + rspamd_hyperscan_free(mp->hs_db, true); + mp->hs_db = NULL; + + return FALSE; } } } @@ -728,7 +756,7 @@ rspamd_multipattern_destroy (struct rspamd_multipattern *mp) } if (mp->hs_db) { - rspamd_hyperscan_free(mp->hs_db); + rspamd_hyperscan_free(mp->hs_db, false); } } -- cgit v1.2.3