]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Add preliminary support of hyperscan caching for re maps
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 24 Nov 2020 16:07:45 +0000 (16:07 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 25 Nov 2020 11:12:21 +0000 (11:12 +0000)
src/libserver/maps/map_helpers.c
src/libserver/mempool_vars_internal.h

index 7eacdf61ac32e0dc7c0f24be988c75c3821aecbb..0848065731a559ca15c24d2d882be26d437b85bc 100644 (file)
@@ -20,6 +20,7 @@
 #include "radix.h"
 #include "rspamd.h"
 #include "cryptobox.h"
+#include "mempool_vars_internal.h"
 #include "contrib/fastutf8/fastutf8.h"
 #include "contrib/cdb/cdb.h"
 
@@ -1029,6 +1030,120 @@ rspamd_radix_dtor (struct map_cb_data *data)
        }
 }
 
+#ifdef WITH_HYPERSCAN
+
+static void
+rspamd_re_map_cache_update (const gchar *fname, struct rspamd_config *cfg)
+{
+       GHashTable *valid_re_hashes;
+
+       valid_re_hashes = rspamd_mempool_get_variable (cfg->cfg_pool,
+                       RSPAMD_MEMPOOL_RE_MAPS_CACHE);
+
+       if (!valid_re_hashes) {
+               valid_re_hashes = g_hash_table_new_full (g_str_hash, g_str_equal,
+                               g_free, NULL);
+               rspamd_mempool_set_variable (cfg->cfg_pool,
+                               RSPAMD_MEMPOOL_RE_MAPS_CACHE,
+                               valid_re_hashes, (rspamd_mempool_destruct_t)g_hash_table_unref);
+       }
+
+       g_hash_table_insert (valid_re_hashes, g_strdup (fname), "1");
+}
+
+static gboolean
+rspamd_try_load_re_map_cache (struct rspamd_regexp_map_helper *re_map)
+{
+       gchar fp[PATH_MAX];
+       gpointer map;
+       gsize len;
+
+       if (!re_map->map->cfg->hs_cache_dir) {
+               return FALSE;
+       }
+
+       rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc",
+                       re_map->map->cfg->hs_cache_dir,
+                       (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+       if ((map = rspamd_file_xmap (fp, PROT_READ, &len, TRUE)) != NULL) {
+               if (hs_deserialize_database (map, len, &re_map->hs_db) == HS_SUCCESS) {
+                       rspamd_re_map_cache_update (fp, re_map->map->cfg);
+                       munmap (map, len);
+
+                       return TRUE;
+               }
+
+               munmap (map, len);
+               /* Remove stale file */
+               (void)unlink (fp);
+       }
+
+       return FALSE;
+}
+
+static gboolean
+rspamd_try_save_re_map_cache (struct rspamd_regexp_map_helper *re_map)
+{
+       gchar fp[PATH_MAX], np[PATH_MAX];
+       gsize len;
+       gint fd;
+       char *bytes = NULL;
+       struct rspamd_map *map;
+
+       map = re_map->map;
+
+       if (!re_map->map->cfg->hs_cache_dir) {
+               return FALSE;
+       }
+
+       rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc.tmp",
+                       re_map->map->cfg->hs_cache_dir,
+                       (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+       if ((fd = rspamd_file_xopen (fp, O_WRONLY | O_CREAT | O_EXCL, 00644, 0)) != -1) {
+               if (hs_serialize_database (re_map->hs_db, &bytes, &len) == HS_SUCCESS) {
+                       if (write (fd, bytes, len) == -1) {
+                               msg_warn_map ("cannot write hyperscan cache to %s: %s",
+                                               fp, strerror (errno));
+                               unlink (fp);
+                               free (bytes);
+                       }
+                       else {
+                               free (bytes);
+                               fsync (fd);
+
+                               rspamd_snprintf (np, sizeof (np), "%s/%*xs.hsmc",
+                                               re_map->map->cfg->hs_cache_dir,
+                                               (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+                               if (rename (fp, np) == -1) {
+                                       msg_warn_map ("cannot rename hyperscan cache from %s to %s: %s",
+                                                       fp, np, strerror (errno));
+                                       unlink (fp);
+                               }
+                               else {
+                                       msg_info_map ("written cached hyperscan data for %s to %s",
+                                                       map->name, np);
+
+                                       rspamd_re_map_cache_update (np, map->cfg);
+                               }
+                       }
+               }
+               else {
+                       msg_warn_map ("cannot serialize hyperscan cache to %s: %s",
+                                       fp, strerror (errno));
+                       unlink (fp);
+               }
+
+
+               close (fd);
+       }
+
+       return FALSE;
+}
+#endif
+
 static void
 rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
 {
@@ -1106,25 +1221,36 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
        }
 
        if (re_map->regexps->len > 0 && re_map->patterns) {
-               gdouble ts1 = rspamd_get_ticks (FALSE);
-
-               if (hs_compile_multi ((const gchar **)re_map->patterns,
-                               re_map->flags,
-                               re_map->ids,
-                               re_map->regexps->len,
-                               HS_MODE_BLOCK,
-                               &plt,
-                               &re_map->hs_db,
-                               &err) != HS_SUCCESS) {
-
-                       msg_err_map ("cannot create tree of regexp when processing '%s': %s",
-                                       err->expression >= 0 ?
-                                                       re_map->patterns[err->expression] :
-                                                       "unknown regexp", err->message);
-                       re_map->hs_db = NULL;
-                       hs_free_compile_error (err);
 
-                       return;
+               if (!rspamd_try_load_re_map_cache (re_map)) {
+                       gdouble ts1 = rspamd_get_ticks (FALSE);
+
+                       if (hs_compile_multi ((const gchar **) re_map->patterns,
+                                       re_map->flags,
+                                       re_map->ids,
+                                       re_map->regexps->len,
+                                       HS_MODE_BLOCK,
+                                       &plt,
+                                       &re_map->hs_db,
+                                       &err) != HS_SUCCESS) {
+
+                               msg_err_map ("cannot create tree of regexp when processing '%s': %s",
+                                               err->expression >= 0 ?
+                                               re_map->patterns[err->expression] :
+                                               "unknown regexp", err->message);
+                               re_map->hs_db = NULL;
+                               hs_free_compile_error (err);
+
+                               return;
+                       }
+
+                       ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
+                       msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
+                                       re_map->regexps->len, re_map->map->name, ts1);
+               }
+               else {
+                       msg_info_map ("hyperscan read %d cached regular expressions from %s",
+                                       re_map->regexps->len, re_map->map->name);
                }
 
                if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) {
@@ -1132,10 +1258,6 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
                        hs_free_database (re_map->hs_db);
                        re_map->hs_db = NULL;
                }
-
-               ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
-               msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
-                               re_map->regexps->len, re_map->map->name, ts1);
        }
        else {
                msg_err_map ("regexp map is empty");
index 576635a9b44c885f3814c653793dc4d80cd5ccc0..6b68dd5a55d8ae18d344e980bfa95ecc5ec59098 100644 (file)
@@ -40,5 +40,6 @@
 #define RSPAMD_MEMPOOL_FUZZY_RESULT "fuzzy_hashes"
 #define RSPAMD_MEMPOOL_SPAM_LEARNS "spam_learns"
 #define RSPAMD_MEMPOOL_HAM_LEARNS "ham_learns"
+#define RSPAMD_MEMPOOL_RE_MAPS_CACHE "re_maps_cache"
 
 #endif