From 64205e24d42f4c9a19ed48579141f619aa792c74 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 23 Mar 2020 15:34:04 +0000 Subject: [PATCH] [Rework] Re cache: Load hyperscan early --- src/libserver/cfg_utils.c | 4 ++++ src/libserver/re_cache.c | 39 ++++++++++++++++++++++++++----------- src/libserver/re_cache.h | 16 +++++++++++---- src/libserver/worker_util.c | 6 ++++-- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index 8486a029f..23ad382b0 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -888,6 +888,10 @@ rspamd_config_post_load (struct rspamd_config *cfg, /* Init re cache */ rspamd_re_cache_init (cfg->re_cache, cfg); + + /* Try load Hypersan */ + rspamd_re_cache_load_hyperscan (cfg->re_cache, + cfg->hs_cache_dir ? cfg->hs_cache_dir : RSPAMD_DBDIR "/"); } if (opts & RSPAMD_CONFIG_INIT_LIBS) { diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 257428720..29edbe0f7 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -124,7 +124,7 @@ struct rspamd_re_cache { gchar hash[rspamd_cryptobox_HASHBYTES + 1]; lua_State *L; #ifdef WITH_HYPERSCAN - gboolean hyperscan_loaded; + enum rspamd_hyperscan_status hyperscan_loaded; gboolean disable_hyperscan; gboolean vectorized_hyperscan; hs_platform_info_t plt; @@ -241,14 +241,14 @@ rspamd_re_cache_new (void) cache->re = g_ptr_array_new_full (256, rspamd_re_cache_elt_dtor); cache->selectors = kh_init (lua_selectors_hash); #ifdef WITH_HYPERSCAN - cache->hyperscan_loaded = FALSE; + cache->hyperscan_loaded = RSPAMD_HYPERSCAN_UNKNOWN; #endif REF_INIT_RETAIN (cache, rspamd_re_cache_destroy); return cache; } -gboolean +enum rspamd_hyperscan_status rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache) { g_assert (cache != NULL); @@ -256,7 +256,7 @@ rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache) #ifdef WITH_HYPERSCAN return cache->hyperscan_loaded; #else - return FALSE; + return RSPAMD_HYPERSCAN_UNSUPPORTED; #endif } @@ -2364,7 +2364,7 @@ rspamd_re_cache_is_valid_hyperscan_file (struct rspamd_re_cache *cache, } -gboolean +enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, const char *cache_dir) { @@ -2372,7 +2372,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, g_assert (cache_dir != NULL); #ifndef WITH_HYPERSCAN - return FALSE; + return RSPAMD_HYPERSCAN_UNSUPPORTED; #else gchar path[PATH_MAX]; gint fd, i, n, *hs_ids = NULL, *hs_flags = NULL, total = 0, ret; @@ -2382,7 +2382,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, struct rspamd_re_class *re_class; struct rspamd_re_cache_elt *elt; struct stat st; - gboolean has_valid = FALSE; + gboolean has_valid = FALSE, all_valid = FALSE; g_hash_table_iter_init (&it, cache->re_classes); @@ -2406,6 +2406,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, if (map == MAP_FAILED) { msg_err_re_cache ("cannot mmap %s: %s", path, strerror (errno)); close (fd); + all_valid = FALSE; continue; } @@ -2422,6 +2423,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, msg_err_re_cache ("bad number of expressions in %s: %d", path, n); munmap (map, st.st_size); + all_valid = FALSE; continue; } @@ -2463,6 +2465,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, re_class->hs_ids = NULL; re_class->hs_scratch = NULL; re_class->hs_db = NULL; + all_valid = FALSE; continue; } @@ -2491,24 +2494,38 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, re_class->hs_ids = hs_ids; g_free (hs_flags); re_class->nhs = n; - has_valid = TRUE; + + if (!has_valid) { + has_valid = TRUE; + all_valid = TRUE; + } } else { msg_err_re_cache ("invalid hyperscan hash file '%s'", path); + all_valid = FALSE; continue; } } if (has_valid) { - msg_info_re_cache ("hyperscan database of %d regexps has been loaded", total); + if (all_valid) { + msg_info_re_cache ("full hyperscan database of %d regexps has been loaded", total); + cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOADED_FULL; + } + else { + msg_info_re_cache ("partial hyperscan database of %d regexps has been loaded", total); + cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOADED_PARTIAL; + } } else { msg_info_re_cache ("hyperscan database has NOT been loaded; no valid expressions"); + cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOAD_ERROR; } - cache->hyperscan_loaded = has_valid; - return has_valid; + + + return cache->hyperscan_loaded; #endif } diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h index 75cee0235..79bcaca16 100644 --- a/src/libserver/re_cache.h +++ b/src/libserver/re_cache.h @@ -90,12 +90,20 @@ void rspamd_re_cache_replace (struct rspamd_re_cache *cache, void rspamd_re_cache_init (struct rspamd_re_cache *cache, struct rspamd_config *cfg); +enum rspamd_hyperscan_status { + RSPAMD_HYPERSCAN_UNKNOWN = 0, + RSPAMD_HYPERSCAN_UNSUPPORTED, + RSPAMD_HYPERSCAN_LOADED_PARTIAL, + RSPAMD_HYPERSCAN_LOADED_FULL, + RSPAMD_HYPERSCAN_LOAD_ERROR, +}; + /** * Returns true when hyperscan is loaded * @param cache * @return */ -gboolean rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache); +enum rspamd_hyperscan_status rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache); /** * Get runtime data for a cache @@ -173,7 +181,6 @@ gint rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache, void (*cb)(guint ncompiled, GError *err, void *cbd), void *cbd); - /** * Returns TRUE if the specified file is valid hyperscan cache */ @@ -183,8 +190,9 @@ gboolean rspamd_re_cache_is_valid_hyperscan_file (struct rspamd_re_cache *cache, /** * Loads all hyperscan regexps precompiled */ -gboolean rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, - const char *cache_dir); +enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan ( + struct rspamd_re_cache *cache, + const char *cache_dir); /** * Registers lua selector in the cache diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c index ceb2f1103..5796b8d7f 100644 --- a/src/libserver/worker_util.c +++ b/src/libserver/worker_util.c @@ -1589,10 +1589,12 @@ rspamd_worker_hyperscan_ready (struct rspamd_main *rspamd_main, memset (&rep, 0, sizeof (rep)); rep.type = RSPAMD_CONTROL_HYPERSCAN_LOADED; - if (!rspamd_re_cache_is_hs_loaded (cache) || cmd->cmd.hs_loaded.forced) { + if (rspamd_re_cache_is_hs_loaded (cache) != RSPAMD_HYPERSCAN_LOADED_FULL || + cmd->cmd.hs_loaded.forced) { + msg_info ("loading hyperscan expressions after receiving compilation " "notice: %s", - (!rspamd_re_cache_is_hs_loaded (cache)) ? + (rspamd_re_cache_is_hs_loaded (cache) != RSPAMD_HYPERSCAN_LOADED_FULL) ? "new db" : "forced update"); rep.reply.hs_loaded.status = rspamd_re_cache_load_hyperscan ( worker->srv->cfg->re_cache, cmd->cmd.hs_loaded.cache_dir); -- 2.39.5