diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-14 19:09:04 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-14 19:09:04 +0100 |
commit | 45aeb65baa5793a1177bb823bd6bdd0b959d25f8 (patch) | |
tree | dde35fe6e398a322ba9f3f8aa61342e8015ec9b3 /src/libutil | |
parent | ee1e8c27b804061f0e981ddc0b575291e4148c31 (diff) | |
download | rspamd-45aeb65baa5793a1177bb823bd6bdd0b959d25f8.tar.gz rspamd-45aeb65baa5793a1177bb823bd6bdd0b959d25f8.zip |
[Feature] Implement caching for hyperscan multipattern
Diffstat (limited to 'src/libutil')
-rw-r--r-- | src/libutil/multipattern.c | 114 | ||||
-rw-r--r-- | src/libutil/multipattern.h | 6 |
2 files changed, 102 insertions, 18 deletions
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c index 838191363..e50574c92 100644 --- a/src/libutil/multipattern.c +++ b/src/libutil/multipattern.c @@ -17,7 +17,10 @@ #include "config.h" #include "libutil/multipattern.h" #include "libutil/str_util.h" -#include "logger.h" +#include "libutil/util.h" +#include "libutil/printf.h" +#include "libcryptobox/cryptobox.h" +#include "unix-std.h" #ifdef WITH_HYPERSCAN #include "hs.h" @@ -25,6 +28,8 @@ #include "acism.h" #endif +static const char *hs_cache_dir = NULL; + struct rspamd_multipattern { #ifdef WITH_HYPERSCAN hs_database_t *db; @@ -32,6 +37,7 @@ struct rspamd_multipattern { GArray *hs_pats; GArray *hs_ids; GArray *hs_flags; + rspamd_cryptobox_hash_state_t hash_state; #else ac_trie_t *t; GArray *pats; @@ -47,6 +53,12 @@ rspamd_multipattern_quark (void) return g_quark_from_static_string ("multipattern"); } +void +rspamd_multipattern_library_init (const gchar *cache_dir) +{ + hs_cache_dir = cache_dir; +} + #ifdef WITH_HYPERSCAN static gchar * rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern) @@ -356,6 +368,7 @@ rspamd_multipattern_create (enum rspamd_multipattern_flags flags) mp->hs_pats = g_array_new (FALSE, TRUE, sizeof (gchar *)); mp->hs_flags = g_array_new (FALSE, TRUE, sizeof (gint)); mp->hs_ids = g_array_new (FALSE, TRUE, sizeof (gint)); + rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0); #else mp->pats = g_array_new (FALSE, TRUE, sizeof (ac_trie_pat_t)); #endif @@ -376,6 +389,7 @@ rspamd_multipattern_create_sized (guint npatterns, mp->hs_pats = g_array_sized_new (FALSE, TRUE, sizeof (gchar *), npatterns); mp->hs_flags = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns); mp->hs_ids = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns); + rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0); #else mp->pats = g_array_sized_new (FALSE, TRUE, sizeof (ac_trie_pat_t), npatterns); #endif @@ -407,6 +421,7 @@ rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp, g_array_append_val (mp->hs_pats, np); fl = mp->cnt; g_array_append_val (mp->hs_ids, fl); + rspamd_cryptobox_hash_update (&mp->hash_state, np, strlen (np)); #else ac_trie_pat_t pat; @@ -438,6 +453,63 @@ rspamd_multipattern_create_full (const gchar **patterns, return mp; } +#ifdef WITH_HYPERSCAN +static gboolean +rspamd_multipattern_try_load_hs (struct rspamd_multipattern *mp, + const guchar *hash) +{ + gchar fp[PATH_MAX]; + gpointer map; + gsize len; + + if (hs_cache_dir == NULL) { + return FALSE; + } + + rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp", hs_cache_dir, + (gint)rspamd_cryptobox_HASHBYTES / 2, hash); + + if ((map = rspamd_file_xmap (fp, PROT_READ, &len)) != NULL) { + if (hs_deserialize_database (map, len, &mp->db) == HS_SUCCESS) { + munmap (map, len); + return TRUE; + } + + munmap (map, len); + /* Remove stale file */ + (void)unlink (fp); + } + + return FALSE; +} + +static void +rspamd_multipattern_try_save_hs (struct rspamd_multipattern *mp, + const guchar *hash) +{ + gchar fp[PATH_MAX]; + char *bytes = NULL; + gsize len; + gint fd; + + if (hs_cache_dir == NULL) { + return; + } + + rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp", hs_cache_dir, + (gint)rspamd_cryptobox_HASHBYTES / 2, hash); + + if ((fd = rspamd_file_xopen (fp, O_WRONLY|O_CREAT|O_EXCL, 00644)) != -1) { + if (hs_serialize_database (mp->db, &bytes, &len) == HS_SUCCESS) { + (void)write (fd, bytes, len); + free (bytes); + } + + close (fd); + } +} +#endif + gboolean rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err) { @@ -447,28 +519,34 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err) #ifdef WITH_HYPERSCAN hs_platform_info_t plt; hs_compile_error_t *hs_errors; + guchar hash[rspamd_cryptobox_HASHBYTES]; if (mp->cnt > 0) { g_assert (hs_populate_platform (&plt) == HS_SUCCESS); - - if (hs_compile_multi ((const char *const *)mp->hs_pats->data, - (const unsigned int *)mp->hs_flags->data, - (const unsigned int *)mp->hs_ids->data, - mp->cnt, - HS_MODE_BLOCK, - &plt, - &mp->db, - &hs_errors) != HS_SUCCESS) { - - g_set_error (err, rspamd_multipattern_quark (), EINVAL, - "cannot create tree of regexp when processing '%s': %s", - g_array_index (mp->hs_pats, char *, hs_errors->expression), - hs_errors->message); - hs_free_compile_error (hs_errors); - - return FALSE; + rspamd_cryptobox_hash_update (&mp->hash_state, (void *)&plt, sizeof (plt)); + rspamd_cryptobox_hash_final (&mp->hash_state, hash); + + if (!rspamd_multipattern_try_load_hs (mp, hash)) { + if (hs_compile_multi ((const char *const *)mp->hs_pats->data, + (const unsigned int *)mp->hs_flags->data, + (const unsigned int *)mp->hs_ids->data, + mp->cnt, + HS_MODE_BLOCK, + &plt, + &mp->db, + &hs_errors) != HS_SUCCESS) { + + g_set_error (err, rspamd_multipattern_quark (), EINVAL, + "cannot create tree of regexp when processing '%s': %s", + g_array_index (mp->hs_pats, char *, hs_errors->expression), + hs_errors->message); + hs_free_compile_error (hs_errors); + + return FALSE; + } } + rspamd_multipattern_try_save_hs (mp, hash); g_assert (hs_alloc_scratch (mp->db, &mp->scratch) == HS_SUCCESS); } #else diff --git a/src/libutil/multipattern.h b/src/libutil/multipattern.h index e956b9182..d8f534b54 100644 --- a/src/libutil/multipattern.h +++ b/src/libutil/multipattern.h @@ -57,6 +57,12 @@ typedef gint (*rspamd_multipattern_cb_t) (struct rspamd_multipattern *mp, void *context); /** + * Init multipart library and set the appropriate cache dir + * @param cache_dir + */ +void rspamd_multipattern_library_init (const gchar *cache_dir); + +/** * Creates empty multipattern structure * @param flags * @return |