From 2648124d978132498a36313936457a015e56b073 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 7 Jul 2018 13:25:53 +0100 Subject: [PATCH] [Project] Implement HTTP maps caching --- src/libserver/cfg_file.h | 3 +- src/libserver/cfg_rcl.c | 6 ++ src/libserver/cfg_utils.c | 1 + src/libutil/map.c | 198 +++++++++++++++++++++++++++++++++++++- src/libutil/map_private.h | 6 +- 5 files changed, 208 insertions(+), 6 deletions(-) diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index a74169b31..12919c198 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -368,7 +368,8 @@ struct rspamd_config { GList *maps; /**< maps active */ gdouble map_timeout; /**< maps watch timeout */ - gdouble map_file_watch_multiplier; /**< multiplier for watch timeout when maps are files */ + gdouble map_file_watch_multiplier; /**< multiplier for watch timeout when maps are files */ + gchar *maps_cache_dir; /**< where to save HTTP cached data */ gdouble monitored_interval; /**< interval between monitored checks */ gboolean disable_monitored; /**< disable monitoring completely */ diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index 1b93d5213..3be0a655c 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -1688,6 +1688,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections) G_STRUCT_OFFSET (struct rspamd_config, map_file_watch_multiplier), 0, "Multiplier for map watch interval when map is file"); + rspamd_rcl_add_default_handler (sub, + "maps_cache_dir", + rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct rspamd_config, maps_cache_dir), + 0, + "Directory to save maps cached data (default: $DBDIR)"); rspamd_rcl_add_default_handler (sub, "monitoring_watch_interval", rspamd_rcl_parse_struct_time, diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index 967032777..ab01a5403 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -198,6 +198,7 @@ rspamd_config_new (enum rspamd_config_init_flags flags) #endif cfg->default_max_shots = DEFAULT_MAX_SHOTS; cfg->max_sessions_cache = DEFAULT_MAX_SESSIONS; + cfg->maps_cache_dir = rspamd_mempool_strdup (cfg->cfg_pool, RSPAMD_DBDIR); REF_INIT_RETAIN (cfg, rspamd_config_free); diff --git a/src/libutil/map.c b/src/libutil/map.c index 7faf24524..d184d32ed 100644 --- a/src/libutil/map.c +++ b/src/libutil/map.c @@ -40,12 +40,23 @@ #define MAP_RELEASE(x, t) REF_RELEASE(x) #endif -static void free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new); +static void free_http_cbdata_common (struct http_callback_data *cbd, + gboolean plan_new); static void free_http_cbdata_dtor (gpointer p); static void free_http_cbdata (struct http_callback_data *cbd); static void rspamd_map_periodic_callback (gint fd, short what, void *ud); static void rspamd_map_schedule_periodic (struct rspamd_map *map, gboolean locked, - gboolean initial, gboolean errored); + gboolean initial, gboolean errored); +static gboolean read_map_file_chunks (struct rspamd_map *map, + struct map_cb_data *cbdata, + const gchar *fname, + gsize len, + goffset off); +static gboolean rspamd_map_save_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata, + const guchar *data, + gsize len); guint rspamd_map_log_id = (guint)-1; RSPAMD_CONSTRUCTOR(rspamd_map_log_init) @@ -735,6 +746,7 @@ read_data: rspamd_inet_address_to_string_pretty (cbd->addr), dlen, zout.pos, next_check_date); map->read_callback (out, zout.pos, &cbd->periodic->cbdata, TRUE); + rspamd_map_save_http_cached_file (map, bk, cbd->data, out, zout.pos); g_free (out); } else { @@ -742,6 +754,7 @@ read_data: cbd->bk->uri, rspamd_inet_address_to_string_pretty (cbd->addr), dlen, next_check_date); + rspamd_map_save_http_cached_file (map, bk, cbd->data, in, cbd->data_len); map->read_callback (in, cbd->data_len, &cbd->periodic->cbdata, TRUE); } @@ -1363,6 +1376,168 @@ rspamd_map_read_cached (struct rspamd_map *map, struct rspamd_map_backend *bk, return TRUE; } +static gboolean +rspamd_map_has_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + struct stat st; + + if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + if (stat (path, &st) != -1 && st.st_size > + sizeof (struct rspamd_http_file_data)) { + return TRUE; + } + + return FALSE; +} + +static gboolean +rspamd_map_save_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata, + const guchar *data, + gsize len) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + gint fd; + struct rspamd_http_file_data header; + + if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + fd = rspamd_file_xopen (path, O_WRONLY | O_TRUNC | O_CREAT, + 00600, FALSE); + + if (fd == -1) { + return FALSE; + } + + if (!rspamd_file_lock (fd, FALSE)) { + msg_err_map ("cannot lock file %s: %s", path, strerror (errno)); + close (fd); + + return FALSE; + } + + memcpy (header.magic, rspamd_http_file_magic, sizeof (rspamd_http_file_magic)); + header.mtime = htdata->last_modified; + header.next_check = map->next_check; + header.data_off = sizeof (header); + + if (write (fd, &header, sizeof (header)) != sizeof (header)) { + msg_err_map ("cannot write file %s: %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + /* Now write the rest */ + if (write (fd, data, len) != len) { + msg_err_map ("cannot write file %s: %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + rspamd_file_unlock (fd, FALSE); + close (fd); + + msg_info_map ("saved data from %s in %s, %uz bytes", bk->uri, path, len); + + return TRUE; +} + +static gboolean +rspamd_map_read_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata, + struct map_cb_data *cbdata) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + gint fd; + struct stat st; + struct rspamd_http_file_data header; + + if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + fd = rspamd_file_xopen (path, O_RDONLY, 00600, FALSE); + + if (fd == -1) { + return FALSE; + } + + if (!rspamd_file_lock (fd, FALSE)) { + msg_err_map ("cannot lock file %s: %s", path, strerror (errno)); + close (fd); + + return FALSE; + } + + (void)fstat (fd, &st); + + if (read (fd, &header, sizeof (header)) != sizeof (header)) { + msg_err_map ("cannot read file %s: %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + if (memcmp (header.magic, rspamd_http_file_magic, + sizeof (rspamd_http_file_magic)) != 0) { + msg_err_map ("invalid magic in file %s: %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + rspamd_file_unlock (fd, FALSE); + close (fd); + + map->next_check = header.next_check; + htdata->last_modified = header.mtime; + + /* Now read file data */ + /* Perform buffered read: fail-safe */ + if (!read_map_file_chunks (map, cbdata, path, + st.st_size - header.data_off, header.data_off)) { + return FALSE; + } + + msg_info_map ("read cached data for %s from %s, %uz bytes", bk->uri, path, + st.st_size - header.data_off); + + return TRUE; +} + /** * Async HTTP callback */ @@ -1766,6 +1941,17 @@ rspamd_map_preload (struct rspamd_config *cfg) PTR_ARRAY_FOREACH (map->backends, i, bk) { if (!(bk->protocol == MAP_PROTO_FILE || bk->protocol == MAP_PROTO_STATIC)) { + + if (bk->protocol == MAP_PROTO_HTTP || + bk->protocol == MAP_PROTO_HTTPS) { + if (!rspamd_map_has_http_cached_file (map, bk)) { + map_ok = FALSE; + break; + } + else { + continue; /* We are yet fine */ + } + } map_ok = FALSE; break; } @@ -1797,6 +1983,14 @@ rspamd_map_preload (struct rspamd_config *cfg) break; } } + else if (bk->protocol == MAP_PROTO_HTTP || + bk->protocol == MAP_PROTO_HTTPS) { + if (!rspamd_map_read_http_cached_file (map, bk, bk->data.hd, + &fake_cbd.cbdata)) { + succeed = FALSE; + break; + } + } else { g_assert_not_reached (); } diff --git a/src/libutil/map_private.h b/src/libutil/map_private.h index 67b813264..55d7f0b15 100644 --- a/src/libutil/map_private.h +++ b/src/libutil/map_private.h @@ -165,11 +165,11 @@ struct map_periodic_cbdata { ref_entry_t ref; }; -static const gchar rspamd_http_cached_magic[] = +static const gchar rspamd_http_file_magic[] = {'r', 'm', 'c', 'd', '1', '0', '0', '0'}; -struct rspamd_http_cached_data { - guchar magic[sizeof (rspamd_http_cached_magic)]; +struct rspamd_http_file_data { + guchar magic[sizeof (rspamd_http_file_magic)]; goffset data_off; gulong mtime; gulong next_check; -- 2.39.5