]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Implement HTTP maps caching
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 7 Jul 2018 12:25:53 +0000 (13:25 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 7 Jul 2018 12:29:26 +0000 (13:29 +0100)
src/libserver/cfg_file.h
src/libserver/cfg_rcl.c
src/libserver/cfg_utils.c
src/libutil/map.c
src/libutil/map_private.h

index a74169b31bcabf78f76dc62746ec7486b41cf762..12919c198cfa6d17e44ae4727fddea361d9e3006 100644 (file)
@@ -368,7 +368,8 @@ struct rspamd_config {
 
        GList *maps;                                    /**< maps active                                                                                */
        gdouble map_timeout;                            /**< maps watch timeout                                                                 */
-       gdouble map_file_watch_multiplier;              /**< multiplier for watch timeout when maps are files */
+       gdouble map_file_watch_multiplier;              /**< multiplier for watch timeout when maps are files   */
+       gchar *maps_cache_dir;                          /**< where to save HTTP cached data                                             */
 
        gdouble monitored_interval;                     /**< interval between monitored checks                                  */
        gboolean disable_monitored;                     /**< disable monitoring completely                                              */
index 1b93d52136f995bcd974e301d4c6fae892d30a2f..3be0a655c6219c92dbaec40c20bc118524eca277 100644 (file)
@@ -1688,6 +1688,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections)
                                G_STRUCT_OFFSET (struct rspamd_config, map_file_watch_multiplier),
                                0,
                                "Multiplier for map watch interval when map is file");
+               rspamd_rcl_add_default_handler (sub,
+                               "maps_cache_dir",
+                               rspamd_rcl_parse_struct_string,
+                               G_STRUCT_OFFSET (struct rspamd_config, maps_cache_dir),
+                               0,
+                               "Directory to save maps cached data (default: $DBDIR)");
                rspamd_rcl_add_default_handler (sub,
                                "monitoring_watch_interval",
                                rspamd_rcl_parse_struct_time,
index 96703277701114d4ab1b2ef8c61dfc5a60507d10..ab01a54033b57d8ad9468f19953c9cb511560e4a 100644 (file)
@@ -198,6 +198,7 @@ rspamd_config_new (enum rspamd_config_init_flags flags)
 #endif
        cfg->default_max_shots = DEFAULT_MAX_SHOTS;
        cfg->max_sessions_cache = DEFAULT_MAX_SESSIONS;
+       cfg->maps_cache_dir = rspamd_mempool_strdup (cfg->cfg_pool, RSPAMD_DBDIR);
 
        REF_INIT_RETAIN (cfg, rspamd_config_free);
 
index 7faf24524b4dd1a0f23dc41f5bad105944b01ffa..d184d32edf1d0f2d75d7f0145a9e10f40678f2c4 100644 (file)
 #define MAP_RELEASE(x, t) REF_RELEASE(x)
 #endif
 
-static void free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new);
+static void free_http_cbdata_common (struct http_callback_data *cbd,
+                                                                        gboolean plan_new);
 static void free_http_cbdata_dtor (gpointer p);
 static void free_http_cbdata (struct http_callback_data *cbd);
 static void rspamd_map_periodic_callback (gint fd, short what, void *ud);
 static void rspamd_map_schedule_periodic (struct rspamd_map *map, gboolean locked,
-               gboolean initial, gboolean errored);
+                                                                                 gboolean initial, gboolean errored);
+static gboolean read_map_file_chunks (struct rspamd_map *map,
+                                                                         struct map_cb_data *cbdata,
+                                                                         const gchar *fname,
+                                                                         gsize len,
+                                                                         goffset off);
+static gboolean rspamd_map_save_http_cached_file (struct rspamd_map *map,
+                                                                                                 struct rspamd_map_backend *bk,
+                                                                                                 struct http_map_data *htdata,
+                                                                                                 const guchar *data,
+                                                                                                 gsize len);
 
 guint rspamd_map_log_id = (guint)-1;
 RSPAMD_CONSTRUCTOR(rspamd_map_log_init)
@@ -735,6 +746,7 @@ read_data:
                                        rspamd_inet_address_to_string_pretty (cbd->addr),
                                        dlen, zout.pos, next_check_date);
                        map->read_callback (out, zout.pos, &cbd->periodic->cbdata, TRUE);
+                       rspamd_map_save_http_cached_file (map, bk, cbd->data, out, zout.pos);
                        g_free (out);
                }
                else {
@@ -742,6 +754,7 @@ read_data:
                                        cbd->bk->uri,
                                        rspamd_inet_address_to_string_pretty (cbd->addr),
                                        dlen, next_check_date);
+                       rspamd_map_save_http_cached_file (map, bk, cbd->data, in, cbd->data_len);
                        map->read_callback (in, cbd->data_len, &cbd->periodic->cbdata, TRUE);
                }
 
@@ -1363,6 +1376,168 @@ rspamd_map_read_cached (struct rspamd_map *map, struct rspamd_map_backend *bk,
        return TRUE;
 }
 
+static gboolean
+rspamd_map_has_http_cached_file (struct rspamd_map *map,
+                                                                struct rspamd_map_backend *bk)
+{
+       gchar path[PATH_MAX];
+       guchar digest[rspamd_cryptobox_HASHBYTES];
+       struct rspamd_config *cfg = map->cfg;
+       struct stat st;
+
+       if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') {
+               return FALSE;
+       }
+
+       rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0);
+       rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir,
+                       G_DIR_SEPARATOR, 20, digest);
+
+       if (stat (path, &st) != -1 && st.st_size >
+                                                                 sizeof (struct rspamd_http_file_data)) {
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+static gboolean
+rspamd_map_save_http_cached_file (struct rspamd_map *map,
+                                                                 struct rspamd_map_backend *bk,
+                                                                 struct http_map_data *htdata,
+                                                                 const guchar *data,
+                                                                 gsize len)
+{
+       gchar path[PATH_MAX];
+       guchar digest[rspamd_cryptobox_HASHBYTES];
+       struct rspamd_config *cfg = map->cfg;
+       gint fd;
+       struct rspamd_http_file_data header;
+
+       if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') {
+               return FALSE;
+       }
+
+       rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0);
+       rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir,
+                       G_DIR_SEPARATOR, 20, digest);
+
+       fd = rspamd_file_xopen (path, O_WRONLY | O_TRUNC | O_CREAT,
+                       00600, FALSE);
+
+       if (fd == -1) {
+               return FALSE;
+       }
+
+       if (!rspamd_file_lock (fd, FALSE)) {
+               msg_err_map ("cannot lock file %s: %s", path, strerror (errno));
+               close (fd);
+
+               return FALSE;
+       }
+
+       memcpy (header.magic, rspamd_http_file_magic, sizeof (rspamd_http_file_magic));
+       header.mtime = htdata->last_modified;
+       header.next_check = map->next_check;
+       header.data_off = sizeof (header);
+
+       if (write (fd, &header, sizeof (header)) != sizeof (header)) {
+               msg_err_map ("cannot write file %s: %s", path, strerror (errno));
+               rspamd_file_unlock (fd, FALSE);
+               close (fd);
+
+               return FALSE;
+       }
+
+       /* Now write the rest */
+       if (write (fd, data, len) != len) {
+               msg_err_map ("cannot write file %s: %s", path, strerror (errno));
+               rspamd_file_unlock (fd, FALSE);
+               close (fd);
+
+               return FALSE;
+       }
+
+       rspamd_file_unlock (fd, FALSE);
+       close (fd);
+
+       msg_info_map ("saved data from %s in %s, %uz bytes", bk->uri, path, len);
+
+       return TRUE;
+}
+
+static gboolean
+rspamd_map_read_http_cached_file (struct rspamd_map *map,
+                                                                 struct rspamd_map_backend *bk,
+                                                                 struct http_map_data *htdata,
+                                                                 struct map_cb_data *cbdata)
+{
+       gchar path[PATH_MAX];
+       guchar digest[rspamd_cryptobox_HASHBYTES];
+       struct rspamd_config *cfg = map->cfg;
+       gint fd;
+       struct stat st;
+       struct rspamd_http_file_data header;
+
+       if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') {
+               return FALSE;
+       }
+
+       rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0);
+       rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir,
+                       G_DIR_SEPARATOR, 20, digest);
+
+       fd = rspamd_file_xopen (path, O_RDONLY, 00600, FALSE);
+
+       if (fd == -1) {
+               return FALSE;
+       }
+
+       if (!rspamd_file_lock (fd, FALSE)) {
+               msg_err_map ("cannot lock file %s: %s", path, strerror (errno));
+               close (fd);
+
+               return FALSE;
+       }
+
+       (void)fstat (fd, &st);
+
+       if (read (fd, &header, sizeof (header)) != sizeof (header)) {
+               msg_err_map ("cannot read file %s: %s", path, strerror (errno));
+               rspamd_file_unlock (fd, FALSE);
+               close (fd);
+
+               return FALSE;
+       }
+
+       if (memcmp (header.magic, rspamd_http_file_magic,
+                       sizeof (rspamd_http_file_magic)) != 0) {
+               msg_err_map ("invalid magic in file %s: %s", path, strerror (errno));
+               rspamd_file_unlock (fd, FALSE);
+               close (fd);
+
+               return FALSE;
+       }
+
+       rspamd_file_unlock (fd, FALSE);
+       close (fd);
+
+       map->next_check = header.next_check;
+       htdata->last_modified = header.mtime;
+
+       /* Now read file data */
+       /* Perform buffered read: fail-safe */
+       if (!read_map_file_chunks (map, cbdata, path,
+                       st.st_size - header.data_off, header.data_off)) {
+               return FALSE;
+       }
+
+       msg_info_map ("read cached data for %s from %s, %uz bytes", bk->uri, path,
+                       st.st_size - header.data_off);
+
+       return TRUE;
+}
+
 /**
  * Async HTTP callback
  */
@@ -1766,6 +1941,17 @@ rspamd_map_preload (struct rspamd_config *cfg)
                PTR_ARRAY_FOREACH (map->backends, i, bk) {
                        if (!(bk->protocol == MAP_PROTO_FILE ||
                                  bk->protocol == MAP_PROTO_STATIC)) {
+
+                               if (bk->protocol == MAP_PROTO_HTTP ||
+                                               bk->protocol == MAP_PROTO_HTTPS) {
+                                       if (!rspamd_map_has_http_cached_file (map, bk)) {
+                                               map_ok = FALSE;
+                                               break;
+                                       }
+                                       else {
+                                               continue; /* We are yet fine */
+                                       }
+                               }
                                map_ok = FALSE;
                                break;
                        }
@@ -1797,6 +1983,14 @@ rspamd_map_preload (struct rspamd_config *cfg)
                                                break;
                                        }
                                }
+                               else if (bk->protocol == MAP_PROTO_HTTP ||
+                                                bk->protocol == MAP_PROTO_HTTPS) {
+                                       if (!rspamd_map_read_http_cached_file (map, bk, bk->data.hd,
+                                                       &fake_cbd.cbdata)) {
+                                               succeed = FALSE;
+                                               break;
+                                       }
+                               }
                                else {
                                        g_assert_not_reached ();
                                }
index 67b813264ac3a4d6ce0cff6c33282ecde5fc7483..55d7f0b158b438c0b59a5d81ff2d7a6720efc0ec 100644 (file)
@@ -165,11 +165,11 @@ struct map_periodic_cbdata {
        ref_entry_t ref;
 };
 
-static const gchar rspamd_http_cached_magic[] =
+static const gchar rspamd_http_file_magic[] =
                {'r', 'm', 'c', 'd', '1', '0', '0', '0'};
 
-struct rspamd_http_cached_data {
-       guchar magic[sizeof (rspamd_http_cached_magic)];
+struct rspamd_http_file_data {
+       guchar magic[sizeof (rspamd_http_file_magic)];
        goffset data_off;
        gulong mtime;
        gulong next_check;