aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-07 17:24:55 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-07 17:24:55 +0000
commit347c5cd168e5350efe9c25eae01f98a66d90245b (patch)
treecaf2a394b8dee0db1e294e18be15fa76072bb0b8 /src
parent92ab6bdb44e6c4fe23a782f0a0f7c98a97aaf6e2 (diff)
downloadrspamd-347c5cd168e5350efe9c25eae01f98a66d90245b.tar.gz
rspamd-347c5cd168e5350efe9c25eae01f98a66d90245b.zip
Add hyperscan compilation support
Diffstat (limited to 'src')
-rw-r--r--src/libserver/re_cache.c163
-rw-r--r--src/libserver/re_cache.h7
2 files changed, 170 insertions, 0 deletions
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 8a6f37119..b6ee1a2c4 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -32,6 +32,7 @@
#include "libutil/util.h"
#ifdef WITH_HYPERSCAN
#include "hs.h"
+#include "unix-std.h"
#endif
struct rspamd_re_class {
@@ -63,6 +64,12 @@ struct rspamd_re_runtime {
struct rspamd_re_cache *cache;
};
+static GQuark
+rspamd_re_cache_quark (void)
+{
+ return g_quark_from_static_string ("re_cache");
+}
+
static guint64
rspamd_re_cache_class_id (enum rspamd_re_type type,
gpointer type_data,
@@ -247,6 +254,8 @@ rspamd_re_cache_init (struct rspamd_re_cache *cache)
features = rspamd_fstring_append (features, "AVX2", 4);
}
+ hs_set_allocator (g_malloc, g_free);
+
msg_info ("loaded hyperscan engine witch cpu tune '%s' and features '%V'",
platform, features);
@@ -621,3 +630,157 @@ rspamd_re_cache_type_from_string (const char *str)
return ret;
}
+
+gboolean
+rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
+ const char *cache_dir,
+ GError **err)
+{
+ g_assert (cache != NULL);
+ g_assert (cache_dir != NULL);
+
+#ifndef WITH_HYPERSCAN
+ g_set_error (err, rspamd_re_cache_quark (), EINVAL, "hyperscan is disabled");
+ return FALSE;
+#else
+ GHashTableIter it, cit;
+ gpointer k, v;
+ struct rspamd_re_class *re_class;
+ gchar path[PATH_MAX];
+ hs_database_t *test_db;
+ gint fd, i, n, *hs_ids = NULL;
+ rspamd_regexp_t *re;
+ hs_compile_error_t *hs_errors;
+ guint *hs_flags = NULL;
+ const gchar **hs_pats = NULL;
+ gchar *hs_serialized;
+ gsize serialized_len;
+
+ g_hash_table_iter_init (&it, cache->re_classes);
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ re_class = v;
+ rspamd_snprintf (path, sizeof (path), "%s%c%s.hs", cache_dir,
+ G_DIR_SEPARATOR, re_class->hash);
+ fd = open (path, O_CREAT|O_TRUNC|O_EXCL|O_WRONLY, 00600);
+
+ if (fd == -1) {
+ g_set_error (err, rspamd_re_cache_quark (), errno, "cannot open file "
+ "%s: %s", path, strerror (errno));
+ return FALSE;
+ }
+
+ g_hash_table_iter_init (&cit, re_class->re);
+ n = g_hash_table_size (re_class->re);
+ hs_flags = g_malloc0 (sizeof (*hs_flags) * n);
+ hs_ids = g_malloc (sizeof (*hs_ids) * n);
+ hs_pats = g_malloc (sizeof (*hs_pats) * n);
+ i = 0;
+
+ while (g_hash_table_iter_next (&cit, &k, &v)) {
+ re = v;
+
+ if (hs_compile (rspamd_regexp_get_pattern (re),
+ HS_FLAG_ALLOWEMPTY,
+ HS_MODE_BLOCK,
+ &cache->plt,
+ &test_db,
+ &hs_errors) != HS_SUCCESS) {
+ msg_info ("cannot compile %s to hyperscan, try prefilter match",
+ rspamd_regexp_get_pattern (re));
+ hs_free_compile_error (hs_errors);
+
+ if (hs_compile (rspamd_regexp_get_pattern (re),
+ HS_FLAG_ALLOWEMPTY | HS_FLAG_PREFILTER,
+ HS_MODE_BLOCK,
+ &cache->plt,
+ &test_db,
+ &hs_errors) != HS_SUCCESS) {
+ msg_info (
+ "cannot compile %s to hyperscan even using prefilter",
+ rspamd_regexp_get_pattern (re));
+ hs_free_compile_error (hs_errors);
+ }
+ else {
+ hs_free_database (test_db);
+ hs_flags[i] = HS_FLAG_ALLOWEMPTY | HS_FLAG_PREFILTER;
+ hs_ids[i] = rspamd_regexp_get_cache_id (re);
+ hs_pats[i] = rspamd_regexp_get_pattern (re);
+ i ++;
+ }
+ }
+ else {
+ hs_flags[i] = HS_FLAG_ALLOWEMPTY;
+ hs_ids[i] = rspamd_regexp_get_cache_id (re);
+ hs_pats[i] = rspamd_regexp_get_pattern (re);
+ i ++;
+ hs_free_database (test_db);
+ }
+ }
+ /* Adjust real re number */
+ n = i;
+
+ if (n > 0) {
+ /* Create the hs tree */
+ if (hs_compile_multi (hs_pats,
+ hs_flags,
+ hs_ids,
+ n,
+ HS_MODE_BLOCK,
+ &cache->plt,
+ &test_db,
+ &hs_errors) != HS_SUCCESS) {
+
+ g_set_error (err, rspamd_re_cache_quark (), EINVAL,
+ "cannot create tree of regexp when processing '%s': %s",
+ hs_pats[hs_errors->expression], hs_errors->message);
+ g_free (hs_flags);
+ g_free (hs_ids);
+ g_free (hs_pats);
+ close (fd);
+ hs_free_compile_error (hs_errors);
+
+ return FALSE;
+ }
+
+ g_free (hs_flags);
+ g_free (hs_ids);
+ g_free (hs_pats);
+
+ if (hs_serialize_database (test_db, &hs_serialized,
+ &serialized_len) != HS_SUCCESS) {
+ g_set_error (err,
+ rspamd_re_cache_quark (),
+ errno,
+ "cannot serialize tree of regexp for %s",
+ re_class->hash);
+
+ close (fd);
+ hs_free_database (test_db);
+
+ return FALSE;
+ }
+
+ hs_free_database (test_db);
+
+ if (write (fd, hs_serialized, serialized_len) != (gssize)serialized_len) {
+ g_set_error (err,
+ rspamd_re_cache_quark (),
+ errno,
+ "cannot serialize tree of regexp to %s: %s",
+ path, strerror (errno));
+ close (fd);
+ g_free (hs_serialized);
+
+ return FALSE;
+ }
+
+ g_free (hs_serialized);
+ }
+
+ close (fd);
+ }
+
+ return TRUE;
+#endif
+}
diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h
index 65a8bd392..5093689a6 100644
--- a/src/libserver/re_cache.h
+++ b/src/libserver/re_cache.h
@@ -126,4 +126,11 @@ const gchar * rspamd_re_cache_type_to_string (enum rspamd_re_type type);
*/
enum rspamd_re_type rspamd_re_cache_type_from_string (const char *str);
+/**
+ * Compile expressions to the hyperscan tree and store in the `cache_dir`
+ */
+gboolean rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
+ const char *cache_dir,
+ GError **err);
+
#endif