aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-04-14 19:09:04 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-04-14 19:09:04 +0100
commit45aeb65baa5793a1177bb823bd6bdd0b959d25f8 (patch)
treedde35fe6e398a322ba9f3f8aa61342e8015ec9b3
parentee1e8c27b804061f0e981ddc0b575291e4148c31 (diff)
downloadrspamd-45aeb65baa5793a1177bb823bd6bdd0b959d25f8.tar.gz
rspamd-45aeb65baa5793a1177bb823bd6bdd0b959d25f8.zip
[Feature] Implement caching for hyperscan multipattern
-rw-r--r--src/libserver/cfg_rcl.c12
-rw-r--r--src/libserver/cfg_utils.c3
-rw-r--r--src/libutil/multipattern.c114
-rw-r--r--src/libutil/multipattern.h6
4 files changed, 114 insertions, 21 deletions
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index 7441e5bf0..e93ba1d45 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -24,6 +24,7 @@
#include "libserver/worker_util.h"
#include "unix-std.h"
#include "cryptobox.h"
+#include "libutil/multipattern.h"
#ifdef HAVE_SYSLOG_H
#include <syslog.h>
@@ -255,8 +256,15 @@ rspamd_rcl_options_handler (rspamd_mempool_t *pool, const ucl_object_t *obj,
}
}
- return rspamd_rcl_section_parse_defaults (section, cfg->cfg_pool, obj,
- cfg, err);
+ if (rspamd_rcl_section_parse_defaults (section, cfg->cfg_pool, obj,
+ cfg, err)) {
+ /* We need to init this early */
+ rspamd_multipattern_library_init (cfg->hs_cache_dir);
+
+ return TRUE;
+ }
+
+ return FALSE;
}
struct rspamd_rcl_symbol_data {
diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c
index 279a1b357..fb3783866 100644
--- a/src/libserver/cfg_utils.c
+++ b/src/libserver/cfg_utils.c
@@ -26,6 +26,7 @@
#include "utlist.h"
#include "stat_api.h"
#include "unix-std.h"
+#include "libutil/multipattern.h"
#include <math.h>
#define DEFAULT_SCORE 10.0
@@ -641,6 +642,7 @@ rspamd_config_post_load (struct rspamd_config *cfg, gboolean validate_cache)
#endif
rspamd_regexp_library_init ();
+ rspamd_multipattern_library_init (cfg->hs_cache_dir);
if ((def_metric =
g_hash_table_lookup (cfg->metrics, DEFAULT_METRIC)) == NULL) {
@@ -670,7 +672,6 @@ rspamd_config_post_load (struct rspamd_config *cfg, gboolean validate_cache)
}
init_dynamic_config (cfg);
-
rspamd_url_init (cfg->tld_file);
/* Insert classifiers symbols */
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c
index 838191363..e50574c92 100644
--- a/src/libutil/multipattern.c
+++ b/src/libutil/multipattern.c
@@ -17,7 +17,10 @@
#include "config.h"
#include "libutil/multipattern.h"
#include "libutil/str_util.h"
-#include "logger.h"
+#include "libutil/util.h"
+#include "libutil/printf.h"
+#include "libcryptobox/cryptobox.h"
+#include "unix-std.h"
#ifdef WITH_HYPERSCAN
#include "hs.h"
@@ -25,6 +28,8 @@
#include "acism.h"
#endif
+static const char *hs_cache_dir = NULL;
+
struct rspamd_multipattern {
#ifdef WITH_HYPERSCAN
hs_database_t *db;
@@ -32,6 +37,7 @@ struct rspamd_multipattern {
GArray *hs_pats;
GArray *hs_ids;
GArray *hs_flags;
+ rspamd_cryptobox_hash_state_t hash_state;
#else
ac_trie_t *t;
GArray *pats;
@@ -47,6 +53,12 @@ rspamd_multipattern_quark (void)
return g_quark_from_static_string ("multipattern");
}
+void
+rspamd_multipattern_library_init (const gchar *cache_dir)
+{
+ hs_cache_dir = cache_dir;
+}
+
#ifdef WITH_HYPERSCAN
static gchar *
rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern)
@@ -356,6 +368,7 @@ rspamd_multipattern_create (enum rspamd_multipattern_flags flags)
mp->hs_pats = g_array_new (FALSE, TRUE, sizeof (gchar *));
mp->hs_flags = g_array_new (FALSE, TRUE, sizeof (gint));
mp->hs_ids = g_array_new (FALSE, TRUE, sizeof (gint));
+ rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);
#else
mp->pats = g_array_new (FALSE, TRUE, sizeof (ac_trie_pat_t));
#endif
@@ -376,6 +389,7 @@ rspamd_multipattern_create_sized (guint npatterns,
mp->hs_pats = g_array_sized_new (FALSE, TRUE, sizeof (gchar *), npatterns);
mp->hs_flags = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
mp->hs_ids = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
+ rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);
#else
mp->pats = g_array_sized_new (FALSE, TRUE, sizeof (ac_trie_pat_t), npatterns);
#endif
@@ -407,6 +421,7 @@ rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp,
g_array_append_val (mp->hs_pats, np);
fl = mp->cnt;
g_array_append_val (mp->hs_ids, fl);
+ rspamd_cryptobox_hash_update (&mp->hash_state, np, strlen (np));
#else
ac_trie_pat_t pat;
@@ -438,6 +453,63 @@ rspamd_multipattern_create_full (const gchar **patterns,
return mp;
}
+#ifdef WITH_HYPERSCAN
+static gboolean
+rspamd_multipattern_try_load_hs (struct rspamd_multipattern *mp,
+ const guchar *hash)
+{
+ gchar fp[PATH_MAX];
+ gpointer map;
+ gsize len;
+
+ if (hs_cache_dir == NULL) {
+ return FALSE;
+ }
+
+ rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp", hs_cache_dir,
+ (gint)rspamd_cryptobox_HASHBYTES / 2, hash);
+
+ if ((map = rspamd_file_xmap (fp, PROT_READ, &len)) != NULL) {
+ if (hs_deserialize_database (map, len, &mp->db) == HS_SUCCESS) {
+ munmap (map, len);
+ return TRUE;
+ }
+
+ munmap (map, len);
+ /* Remove stale file */
+ (void)unlink (fp);
+ }
+
+ return FALSE;
+}
+
+static void
+rspamd_multipattern_try_save_hs (struct rspamd_multipattern *mp,
+ const guchar *hash)
+{
+ gchar fp[PATH_MAX];
+ char *bytes = NULL;
+ gsize len;
+ gint fd;
+
+ if (hs_cache_dir == NULL) {
+ return;
+ }
+
+ rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp", hs_cache_dir,
+ (gint)rspamd_cryptobox_HASHBYTES / 2, hash);
+
+ if ((fd = rspamd_file_xopen (fp, O_WRONLY|O_CREAT|O_EXCL, 00644)) != -1) {
+ if (hs_serialize_database (mp->db, &bytes, &len) == HS_SUCCESS) {
+ (void)write (fd, bytes, len);
+ free (bytes);
+ }
+
+ close (fd);
+ }
+}
+#endif
+
gboolean
rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err)
{
@@ -447,28 +519,34 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err)
#ifdef WITH_HYPERSCAN
hs_platform_info_t plt;
hs_compile_error_t *hs_errors;
+ guchar hash[rspamd_cryptobox_HASHBYTES];
if (mp->cnt > 0) {
g_assert (hs_populate_platform (&plt) == HS_SUCCESS);
-
- if (hs_compile_multi ((const char *const *)mp->hs_pats->data,
- (const unsigned int *)mp->hs_flags->data,
- (const unsigned int *)mp->hs_ids->data,
- mp->cnt,
- HS_MODE_BLOCK,
- &plt,
- &mp->db,
- &hs_errors) != HS_SUCCESS) {
-
- g_set_error (err, rspamd_multipattern_quark (), EINVAL,
- "cannot create tree of regexp when processing '%s': %s",
- g_array_index (mp->hs_pats, char *, hs_errors->expression),
- hs_errors->message);
- hs_free_compile_error (hs_errors);
-
- return FALSE;
+ rspamd_cryptobox_hash_update (&mp->hash_state, (void *)&plt, sizeof (plt));
+ rspamd_cryptobox_hash_final (&mp->hash_state, hash);
+
+ if (!rspamd_multipattern_try_load_hs (mp, hash)) {
+ if (hs_compile_multi ((const char *const *)mp->hs_pats->data,
+ (const unsigned int *)mp->hs_flags->data,
+ (const unsigned int *)mp->hs_ids->data,
+ mp->cnt,
+ HS_MODE_BLOCK,
+ &plt,
+ &mp->db,
+ &hs_errors) != HS_SUCCESS) {
+
+ g_set_error (err, rspamd_multipattern_quark (), EINVAL,
+ "cannot create tree of regexp when processing '%s': %s",
+ g_array_index (mp->hs_pats, char *, hs_errors->expression),
+ hs_errors->message);
+ hs_free_compile_error (hs_errors);
+
+ return FALSE;
+ }
}
+ rspamd_multipattern_try_save_hs (mp, hash);
g_assert (hs_alloc_scratch (mp->db, &mp->scratch) == HS_SUCCESS);
}
#else
diff --git a/src/libutil/multipattern.h b/src/libutil/multipattern.h
index e956b9182..d8f534b54 100644
--- a/src/libutil/multipattern.h
+++ b/src/libutil/multipattern.h
@@ -57,6 +57,12 @@ typedef gint (*rspamd_multipattern_cb_t) (struct rspamd_multipattern *mp,
void *context);
/**
+ * Init multipart library and set the appropriate cache dir
+ * @param cache_dir
+ */
+void rspamd_multipattern_library_init (const gchar *cache_dir);
+
+/**
* Creates empty multipattern structure
* @param flags
* @return