From: Vsevolod Stakhov Date: Thu, 14 Apr 2016 18:09:04 +0000 (+0100) Subject: [Feature] Implement caching for hyperscan multipattern X-Git-Tag: 1.2.4~33 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=45aeb65baa5793a1177bb823bd6bdd0b959d25f8;p=rspamd.git [Feature] Implement caching for hyperscan multipattern --- diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index 7441e5bf0..e93ba1d45 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -24,6 +24,7 @@ #include "libserver/worker_util.h" #include "unix-std.h" #include "cryptobox.h" +#include "libutil/multipattern.h" #ifdef HAVE_SYSLOG_H #include @@ -255,8 +256,15 @@ rspamd_rcl_options_handler (rspamd_mempool_t *pool, const ucl_object_t *obj, } } - return rspamd_rcl_section_parse_defaults (section, cfg->cfg_pool, obj, - cfg, err); + if (rspamd_rcl_section_parse_defaults (section, cfg->cfg_pool, obj, + cfg, err)) { + /* We need to init this early */ + rspamd_multipattern_library_init (cfg->hs_cache_dir); + + return TRUE; + } + + return FALSE; } struct rspamd_rcl_symbol_data { diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index 279a1b357..fb3783866 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -26,6 +26,7 @@ #include "utlist.h" #include "stat_api.h" #include "unix-std.h" +#include "libutil/multipattern.h" #include #define DEFAULT_SCORE 10.0 @@ -641,6 +642,7 @@ rspamd_config_post_load (struct rspamd_config *cfg, gboolean validate_cache) #endif rspamd_regexp_library_init (); + rspamd_multipattern_library_init (cfg->hs_cache_dir); if ((def_metric = g_hash_table_lookup (cfg->metrics, DEFAULT_METRIC)) == NULL) { @@ -670,7 +672,6 @@ rspamd_config_post_load (struct rspamd_config *cfg, gboolean validate_cache) } init_dynamic_config (cfg); - rspamd_url_init (cfg->tld_file); /* Insert classifiers symbols */ diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c index 838191363..e50574c92 100644 --- a/src/libutil/multipattern.c +++ b/src/libutil/multipattern.c @@ -17,7 +17,10 @@ #include "config.h" #include "libutil/multipattern.h" #include "libutil/str_util.h" -#include "logger.h" +#include "libutil/util.h" +#include "libutil/printf.h" +#include "libcryptobox/cryptobox.h" +#include "unix-std.h" #ifdef WITH_HYPERSCAN #include "hs.h" @@ -25,6 +28,8 @@ #include "acism.h" #endif +static const char *hs_cache_dir = NULL; + struct rspamd_multipattern { #ifdef WITH_HYPERSCAN hs_database_t *db; @@ -32,6 +37,7 @@ struct rspamd_multipattern { GArray *hs_pats; GArray *hs_ids; GArray *hs_flags; + rspamd_cryptobox_hash_state_t hash_state; #else ac_trie_t *t; GArray *pats; @@ -47,6 +53,12 @@ rspamd_multipattern_quark (void) return g_quark_from_static_string ("multipattern"); } +void +rspamd_multipattern_library_init (const gchar *cache_dir) +{ + hs_cache_dir = cache_dir; +} + #ifdef WITH_HYPERSCAN static gchar * rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern) @@ -356,6 +368,7 @@ rspamd_multipattern_create (enum rspamd_multipattern_flags flags) mp->hs_pats = g_array_new (FALSE, TRUE, sizeof (gchar *)); mp->hs_flags = g_array_new (FALSE, TRUE, sizeof (gint)); mp->hs_ids = g_array_new (FALSE, TRUE, sizeof (gint)); + rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0); #else mp->pats = g_array_new (FALSE, TRUE, sizeof (ac_trie_pat_t)); #endif @@ -376,6 +389,7 @@ rspamd_multipattern_create_sized (guint npatterns, mp->hs_pats = g_array_sized_new (FALSE, TRUE, sizeof (gchar *), npatterns); mp->hs_flags = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns); mp->hs_ids = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns); + rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0); #else mp->pats = g_array_sized_new (FALSE, TRUE, sizeof (ac_trie_pat_t), npatterns); #endif @@ -407,6 +421,7 @@ rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp, g_array_append_val (mp->hs_pats, np); fl = mp->cnt; g_array_append_val (mp->hs_ids, fl); + rspamd_cryptobox_hash_update (&mp->hash_state, np, strlen (np)); #else ac_trie_pat_t pat; @@ -438,6 +453,63 @@ rspamd_multipattern_create_full (const gchar **patterns, return mp; } +#ifdef WITH_HYPERSCAN +static gboolean +rspamd_multipattern_try_load_hs (struct rspamd_multipattern *mp, + const guchar *hash) +{ + gchar fp[PATH_MAX]; + gpointer map; + gsize len; + + if (hs_cache_dir == NULL) { + return FALSE; + } + + rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp", hs_cache_dir, + (gint)rspamd_cryptobox_HASHBYTES / 2, hash); + + if ((map = rspamd_file_xmap (fp, PROT_READ, &len)) != NULL) { + if (hs_deserialize_database (map, len, &mp->db) == HS_SUCCESS) { + munmap (map, len); + return TRUE; + } + + munmap (map, len); + /* Remove stale file */ + (void)unlink (fp); + } + + return FALSE; +} + +static void +rspamd_multipattern_try_save_hs (struct rspamd_multipattern *mp, + const guchar *hash) +{ + gchar fp[PATH_MAX]; + char *bytes = NULL; + gsize len; + gint fd; + + if (hs_cache_dir == NULL) { + return; + } + + rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp", hs_cache_dir, + (gint)rspamd_cryptobox_HASHBYTES / 2, hash); + + if ((fd = rspamd_file_xopen (fp, O_WRONLY|O_CREAT|O_EXCL, 00644)) != -1) { + if (hs_serialize_database (mp->db, &bytes, &len) == HS_SUCCESS) { + (void)write (fd, bytes, len); + free (bytes); + } + + close (fd); + } +} +#endif + gboolean rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err) { @@ -447,28 +519,34 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err) #ifdef WITH_HYPERSCAN hs_platform_info_t plt; hs_compile_error_t *hs_errors; + guchar hash[rspamd_cryptobox_HASHBYTES]; if (mp->cnt > 0) { g_assert (hs_populate_platform (&plt) == HS_SUCCESS); - - if (hs_compile_multi ((const char *const *)mp->hs_pats->data, - (const unsigned int *)mp->hs_flags->data, - (const unsigned int *)mp->hs_ids->data, - mp->cnt, - HS_MODE_BLOCK, - &plt, - &mp->db, - &hs_errors) != HS_SUCCESS) { - - g_set_error (err, rspamd_multipattern_quark (), EINVAL, - "cannot create tree of regexp when processing '%s': %s", - g_array_index (mp->hs_pats, char *, hs_errors->expression), - hs_errors->message); - hs_free_compile_error (hs_errors); - - return FALSE; + rspamd_cryptobox_hash_update (&mp->hash_state, (void *)&plt, sizeof (plt)); + rspamd_cryptobox_hash_final (&mp->hash_state, hash); + + if (!rspamd_multipattern_try_load_hs (mp, hash)) { + if (hs_compile_multi ((const char *const *)mp->hs_pats->data, + (const unsigned int *)mp->hs_flags->data, + (const unsigned int *)mp->hs_ids->data, + mp->cnt, + HS_MODE_BLOCK, + &plt, + &mp->db, + &hs_errors) != HS_SUCCESS) { + + g_set_error (err, rspamd_multipattern_quark (), EINVAL, + "cannot create tree of regexp when processing '%s': %s", + g_array_index (mp->hs_pats, char *, hs_errors->expression), + hs_errors->message); + hs_free_compile_error (hs_errors); + + return FALSE; + } } + rspamd_multipattern_try_save_hs (mp, hash); g_assert (hs_alloc_scratch (mp->db, &mp->scratch) == HS_SUCCESS); } #else diff --git a/src/libutil/multipattern.h b/src/libutil/multipattern.h index e956b9182..d8f534b54 100644 --- a/src/libutil/multipattern.h +++ b/src/libutil/multipattern.h @@ -56,6 +56,12 @@ typedef gint (*rspamd_multipattern_cb_t) (struct rspamd_multipattern *mp, gsize len, void *context); +/** + * Init multipart library and set the appropriate cache dir + * @param cache_dir + */ +void rspamd_multipattern_library_init (const gchar *cache_dir); + /** * Creates empty multipattern structure * @param flags