]> source.dussan.org Git - rspamd.git/commitdiff
Add hyperscan compilation support
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 7 Dec 2015 17:24:55 +0000 (17:24 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 7 Dec 2015 17:24:55 +0000 (17:24 +0000)
src/libserver/re_cache.c
src/libserver/re_cache.h

index 8a6f371191c8f34b6528dd6f268f6954c0badac7..b6ee1a2c41c3317dfba16ef4abcdefbb73a26ac6 100644 (file)
@@ -32,6 +32,7 @@
 #include "libutil/util.h"
 #ifdef WITH_HYPERSCAN
 #include "hs.h"
+#include "unix-std.h"
 #endif
 
 struct rspamd_re_class {
@@ -63,6 +64,12 @@ struct rspamd_re_runtime {
        struct rspamd_re_cache *cache;
 };
 
+static GQuark
+rspamd_re_cache_quark (void)
+{
+       return g_quark_from_static_string ("re_cache");
+}
+
 static guint64
 rspamd_re_cache_class_id (enum rspamd_re_type type,
                gpointer type_data,
@@ -247,6 +254,8 @@ rspamd_re_cache_init (struct rspamd_re_cache *cache)
                features = rspamd_fstring_append (features, "AVX2", 4);
        }
 
+       hs_set_allocator (g_malloc, g_free);
+
        msg_info ("loaded hyperscan engine witch cpu tune '%s' and features '%V'",
                        platform, features);
 
@@ -621,3 +630,157 @@ rspamd_re_cache_type_from_string (const char *str)
 
        return ret;
 }
+
+gboolean
+rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
+               const char *cache_dir,
+               GError **err)
+{
+       g_assert (cache != NULL);
+       g_assert (cache_dir != NULL);
+
+#ifndef WITH_HYPERSCAN
+       g_set_error (err, rspamd_re_cache_quark (), EINVAL, "hyperscan is disabled");
+       return FALSE;
+#else
+       GHashTableIter it, cit;
+       gpointer k, v;
+       struct rspamd_re_class *re_class;
+       gchar path[PATH_MAX];
+       hs_database_t *test_db;
+       gint fd, i, n, *hs_ids = NULL;
+       rspamd_regexp_t *re;
+       hs_compile_error_t *hs_errors;
+       guint *hs_flags = NULL;
+       const gchar **hs_pats = NULL;
+       gchar *hs_serialized;
+       gsize serialized_len;
+
+       g_hash_table_iter_init (&it, cache->re_classes);
+
+       while (g_hash_table_iter_next (&it, &k, &v)) {
+               re_class = v;
+               rspamd_snprintf (path, sizeof (path), "%s%c%s.hs", cache_dir,
+                               G_DIR_SEPARATOR, re_class->hash);
+               fd = open (path, O_CREAT|O_TRUNC|O_EXCL|O_WRONLY, 00600);
+
+               if (fd == -1) {
+                       g_set_error (err, rspamd_re_cache_quark (), errno, "cannot open file "
+                                       "%s: %s", path, strerror (errno));
+                       return FALSE;
+               }
+
+               g_hash_table_iter_init (&cit, re_class->re);
+               n = g_hash_table_size (re_class->re);
+               hs_flags = g_malloc0 (sizeof (*hs_flags) * n);
+               hs_ids = g_malloc (sizeof (*hs_ids) * n);
+               hs_pats = g_malloc (sizeof (*hs_pats) * n);
+               i = 0;
+
+               while (g_hash_table_iter_next (&cit, &k, &v)) {
+                       re = v;
+
+                       if (hs_compile (rspamd_regexp_get_pattern (re),
+                                       HS_FLAG_ALLOWEMPTY,
+                                       HS_MODE_BLOCK,
+                                       &cache->plt,
+                                       &test_db,
+                                       &hs_errors) != HS_SUCCESS) {
+                               msg_info ("cannot compile %s to hyperscan, try prefilter match",
+                                               rspamd_regexp_get_pattern (re));
+                               hs_free_compile_error (hs_errors);
+
+                               if (hs_compile (rspamd_regexp_get_pattern (re),
+                                               HS_FLAG_ALLOWEMPTY | HS_FLAG_PREFILTER,
+                                               HS_MODE_BLOCK,
+                                               &cache->plt,
+                                               &test_db,
+                                               &hs_errors) != HS_SUCCESS) {
+                                       msg_info (
+                                                       "cannot compile %s to hyperscan even using prefilter",
+                                                       rspamd_regexp_get_pattern (re));
+                                       hs_free_compile_error (hs_errors);
+                               }
+                               else {
+                                       hs_free_database (test_db);
+                                       hs_flags[i] = HS_FLAG_ALLOWEMPTY | HS_FLAG_PREFILTER;
+                                       hs_ids[i] = rspamd_regexp_get_cache_id (re);
+                                       hs_pats[i] = rspamd_regexp_get_pattern (re);
+                                       i ++;
+                               }
+                       }
+                       else {
+                               hs_flags[i] = HS_FLAG_ALLOWEMPTY;
+                               hs_ids[i] = rspamd_regexp_get_cache_id (re);
+                               hs_pats[i] = rspamd_regexp_get_pattern (re);
+                               i ++;
+                               hs_free_database (test_db);
+                       }
+               }
+               /* Adjust real re number */
+               n = i;
+
+               if (n > 0) {
+                       /* Create the hs tree */
+                       if (hs_compile_multi (hs_pats,
+                                       hs_flags,
+                                       hs_ids,
+                                       n,
+                                       HS_MODE_BLOCK,
+                                       &cache->plt,
+                                       &test_db,
+                                       &hs_errors) != HS_SUCCESS) {
+
+                               g_set_error (err, rspamd_re_cache_quark (), EINVAL,
+                                               "cannot create tree of regexp when processing '%s': %s",
+                                               hs_pats[hs_errors->expression], hs_errors->message);
+                               g_free (hs_flags);
+                               g_free (hs_ids);
+                               g_free (hs_pats);
+                               close (fd);
+                               hs_free_compile_error (hs_errors);
+
+                               return FALSE;
+                       }
+
+                       g_free (hs_flags);
+                       g_free (hs_ids);
+                       g_free (hs_pats);
+
+                       if (hs_serialize_database (test_db, &hs_serialized,
+                                       &serialized_len) != HS_SUCCESS) {
+                               g_set_error (err,
+                                               rspamd_re_cache_quark (),
+                                               errno,
+                                               "cannot serialize tree of regexp for %s",
+                                               re_class->hash);
+
+                               close (fd);
+                               hs_free_database (test_db);
+
+                               return FALSE;
+                       }
+
+                       hs_free_database (test_db);
+
+                       if (write (fd, hs_serialized, serialized_len) != (gssize)serialized_len) {
+                               g_set_error (err,
+                                               rspamd_re_cache_quark (),
+                                               errno,
+                                               "cannot serialize tree of regexp to %s: %s",
+                                               path, strerror (errno));
+                               close (fd);
+                               g_free (hs_serialized);
+
+                               return FALSE;
+                       }
+
+                       g_free (hs_serialized);
+               }
+
+               close (fd);
+       }
+
+       return TRUE;
+#endif
+}
index 65a8bd392f613dd7897eb17ffe785efb212b607a..5093689a608dbdd7348ededcbdc65aaa695aaaaf 100644 (file)
@@ -126,4 +126,11 @@ const gchar * rspamd_re_cache_type_to_string (enum rspamd_re_type type);
  */
 enum rspamd_re_type rspamd_re_cache_type_from_string (const char *str);
 
+/**
+ * Compile expressions to the hyperscan tree and store in the `cache_dir`
+ */
+gboolean rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
+               const char *cache_dir,
+               GError **err);
+
 #endif