diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-12-07 17:24:55 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-12-07 17:24:55 +0000 |
commit | 347c5cd168e5350efe9c25eae01f98a66d90245b (patch) | |
tree | caf2a394b8dee0db1e294e18be15fa76072bb0b8 /src | |
parent | 92ab6bdb44e6c4fe23a782f0a0f7c98a97aaf6e2 (diff) | |
download | rspamd-347c5cd168e5350efe9c25eae01f98a66d90245b.tar.gz rspamd-347c5cd168e5350efe9c25eae01f98a66d90245b.zip |
Add hyperscan compilation support
Diffstat (limited to 'src')
-rw-r--r-- | src/libserver/re_cache.c | 163 | ||||
-rw-r--r-- | src/libserver/re_cache.h | 7 |
2 files changed, 170 insertions, 0 deletions
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 8a6f37119..b6ee1a2c4 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -32,6 +32,7 @@ #include "libutil/util.h" #ifdef WITH_HYPERSCAN #include "hs.h" +#include "unix-std.h" #endif struct rspamd_re_class { @@ -63,6 +64,12 @@ struct rspamd_re_runtime { struct rspamd_re_cache *cache; }; +static GQuark +rspamd_re_cache_quark (void) +{ + return g_quark_from_static_string ("re_cache"); +} + static guint64 rspamd_re_cache_class_id (enum rspamd_re_type type, gpointer type_data, @@ -247,6 +254,8 @@ rspamd_re_cache_init (struct rspamd_re_cache *cache) features = rspamd_fstring_append (features, "AVX2", 4); } + hs_set_allocator (g_malloc, g_free); + msg_info ("loaded hyperscan engine witch cpu tune '%s' and features '%V'", platform, features); @@ -621,3 +630,157 @@ rspamd_re_cache_type_from_string (const char *str) return ret; } + +gboolean +rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache, + const char *cache_dir, + GError **err) +{ + g_assert (cache != NULL); + g_assert (cache_dir != NULL); + +#ifndef WITH_HYPERSCAN + g_set_error (err, rspamd_re_cache_quark (), EINVAL, "hyperscan is disabled"); + return FALSE; +#else + GHashTableIter it, cit; + gpointer k, v; + struct rspamd_re_class *re_class; + gchar path[PATH_MAX]; + hs_database_t *test_db; + gint fd, i, n, *hs_ids = NULL; + rspamd_regexp_t *re; + hs_compile_error_t *hs_errors; + guint *hs_flags = NULL; + const gchar **hs_pats = NULL; + gchar *hs_serialized; + gsize serialized_len; + + g_hash_table_iter_init (&it, cache->re_classes); + + while (g_hash_table_iter_next (&it, &k, &v)) { + re_class = v; + rspamd_snprintf (path, sizeof (path), "%s%c%s.hs", cache_dir, + G_DIR_SEPARATOR, re_class->hash); + fd = open (path, O_CREAT|O_TRUNC|O_EXCL|O_WRONLY, 00600); + + if (fd == -1) { + g_set_error (err, rspamd_re_cache_quark (), errno, "cannot open file " + "%s: %s", path, strerror (errno)); + return FALSE; + } + + g_hash_table_iter_init (&cit, re_class->re); + n = g_hash_table_size (re_class->re); + hs_flags = g_malloc0 (sizeof (*hs_flags) * n); + hs_ids = g_malloc (sizeof (*hs_ids) * n); + hs_pats = g_malloc (sizeof (*hs_pats) * n); + i = 0; + + while (g_hash_table_iter_next (&cit, &k, &v)) { + re = v; + + if (hs_compile (rspamd_regexp_get_pattern (re), + HS_FLAG_ALLOWEMPTY, + HS_MODE_BLOCK, + &cache->plt, + &test_db, + &hs_errors) != HS_SUCCESS) { + msg_info ("cannot compile %s to hyperscan, try prefilter match", + rspamd_regexp_get_pattern (re)); + hs_free_compile_error (hs_errors); + + if (hs_compile (rspamd_regexp_get_pattern (re), + HS_FLAG_ALLOWEMPTY | HS_FLAG_PREFILTER, + HS_MODE_BLOCK, + &cache->plt, + &test_db, + &hs_errors) != HS_SUCCESS) { + msg_info ( + "cannot compile %s to hyperscan even using prefilter", + rspamd_regexp_get_pattern (re)); + hs_free_compile_error (hs_errors); + } + else { + hs_free_database (test_db); + hs_flags[i] = HS_FLAG_ALLOWEMPTY | HS_FLAG_PREFILTER; + hs_ids[i] = rspamd_regexp_get_cache_id (re); + hs_pats[i] = rspamd_regexp_get_pattern (re); + i ++; + } + } + else { + hs_flags[i] = HS_FLAG_ALLOWEMPTY; + hs_ids[i] = rspamd_regexp_get_cache_id (re); + hs_pats[i] = rspamd_regexp_get_pattern (re); + i ++; + hs_free_database (test_db); + } + } + /* Adjust real re number */ + n = i; + + if (n > 0) { + /* Create the hs tree */ + if (hs_compile_multi (hs_pats, + hs_flags, + hs_ids, + n, + HS_MODE_BLOCK, + &cache->plt, + &test_db, + &hs_errors) != HS_SUCCESS) { + + g_set_error (err, rspamd_re_cache_quark (), EINVAL, + "cannot create tree of regexp when processing '%s': %s", + hs_pats[hs_errors->expression], hs_errors->message); + g_free (hs_flags); + g_free (hs_ids); + g_free (hs_pats); + close (fd); + hs_free_compile_error (hs_errors); + + return FALSE; + } + + g_free (hs_flags); + g_free (hs_ids); + g_free (hs_pats); + + if (hs_serialize_database (test_db, &hs_serialized, + &serialized_len) != HS_SUCCESS) { + g_set_error (err, + rspamd_re_cache_quark (), + errno, + "cannot serialize tree of regexp for %s", + re_class->hash); + + close (fd); + hs_free_database (test_db); + + return FALSE; + } + + hs_free_database (test_db); + + if (write (fd, hs_serialized, serialized_len) != (gssize)serialized_len) { + g_set_error (err, + rspamd_re_cache_quark (), + errno, + "cannot serialize tree of regexp to %s: %s", + path, strerror (errno)); + close (fd); + g_free (hs_serialized); + + return FALSE; + } + + g_free (hs_serialized); + } + + close (fd); + } + + return TRUE; +#endif +} diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h index 65a8bd392..5093689a6 100644 --- a/src/libserver/re_cache.h +++ b/src/libserver/re_cache.h @@ -126,4 +126,11 @@ const gchar * rspamd_re_cache_type_to_string (enum rspamd_re_type type); */ enum rspamd_re_type rspamd_re_cache_type_from_string (const char *str); +/** + * Compile expressions to the hyperscan tree and store in the `cache_dir` + */ +gboolean rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache, + const char *cache_dir, + GError **err); + #endif |