diff options
Diffstat (limited to 'src/libserver')
31 files changed, 12280 insertions, 33 deletions
diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt index 4b999c900..635c65b13 100644 --- a/src/libserver/CMakeLists.txt +++ b/src/libserver/CMakeLists.txt @@ -18,6 +18,7 @@ SET(LIBRSPAMDSERVERSRC ${CMAKE_CURRENT_SOURCE_DIR}/redis_pool.c ${CMAKE_CURRENT_SOURCE_DIR}/roll_history.c ${CMAKE_CURRENT_SOURCE_DIR}/spf.c + ${CMAKE_CURRENT_SOURCE_DIR}/ssl_util.c ${CMAKE_CURRENT_SOURCE_DIR}/rspamd_symcache.c ${CMAKE_CURRENT_SOURCE_DIR}/task.c ${CMAKE_CURRENT_SOURCE_DIR}/url.c @@ -25,7 +26,14 @@ SET(LIBRSPAMDSERVERSRC ${CMAKE_CURRENT_SOURCE_DIR}/logger/logger.c ${CMAKE_CURRENT_SOURCE_DIR}/logger/logger_file.c ${CMAKE_CURRENT_SOURCE_DIR}/logger/logger_syslog.c - ${CMAKE_CURRENT_SOURCE_DIR}/logger/logger_console.c) + ${CMAKE_CURRENT_SOURCE_DIR}/logger/logger_console.c + ${CMAKE_CURRENT_SOURCE_DIR}/http/http_util.c + ${CMAKE_CURRENT_SOURCE_DIR}/http/http_message.c + ${CMAKE_CURRENT_SOURCE_DIR}/http/http_connection.c + ${CMAKE_CURRENT_SOURCE_DIR}/http/http_router.c + ${CMAKE_CURRENT_SOURCE_DIR}/http/http_context.c + ${CMAKE_CURRENT_SOURCE_DIR}/maps/map.c + ${CMAKE_CURRENT_SOURCE_DIR}/maps/map_helpers.c) # Librspamd-server SET(RSPAMD_SERVER ${LIBRSPAMDSERVERSRC} PARENT_SCOPE) diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index cf1532692..4a8ab5bfc 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -821,6 +821,36 @@ struct rspamd_action *rspamd_config_get_action_by_type (struct rspamd_config *cf int rspamd_config_ev_backend_get (struct rspamd_config *cfg); const gchar * rspamd_config_ev_backend_to_string (int ev_backend, gboolean *effective); +struct rspamd_external_libs_ctx; + +/** + * Initialize rspamd libraries + */ +struct rspamd_external_libs_ctx *rspamd_init_libs (void); + +/** + * Reset and initialize decompressor + * @param ctx + */ +gboolean rspamd_libs_reset_decompression (struct rspamd_external_libs_ctx *ctx); + +/** + * Reset and initialize compressor + * @param ctx + */ +gboolean rspamd_libs_reset_compression (struct rspamd_external_libs_ctx *ctx); + +/** + * Destroy external libraries context + */ +void rspamd_deinit_libs (struct rspamd_external_libs_ctx *ctx); + +/** + * Configure libraries + */ +gboolean rspamd_config_libs (struct rspamd_external_libs_ctx *ctx, + struct rspamd_config *cfg); + #define msg_err_config(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ cfg->cfg_pool->tag.tagname, cfg->checksum, \ G_STRFUNC, \ diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index cd85b73ac..961a2610e 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -21,9 +21,9 @@ #include "scan_result.h" #include "lua/lua_common.h" #include "lua/lua_thread_pool.h" -#include "map.h" -#include "map_helpers.h" -#include "map_private.h" +#include "maps/map.h" +#include "maps/map_helpers.h" +#include "maps/map_private.h" #include "dynamic_cfg.h" #include "utlist.h" #include "stat_api.h" @@ -31,6 +31,26 @@ #include "libutil/multipattern.h" #include "monitored.h" #include "ref.h" +#include "cryptobox.h" +#include "ssl_util.h" +#include "contrib/libottery/ottery.h" +#include "contrib/fastutf8/fastutf8.h" + +#define ZSTD_STATIC_LINKING_ONLY +#include "contrib/zstd/zstd.h" +#include "contrib/zstd/zdict.h" + +#ifdef HAVE_OPENSSL +#include <openssl/rand.h> +#include <openssl/err.h> +#include <openssl/evp.h> +#include <openssl/ssl.h> +#include <openssl/conf.h> +#include <openssl/engine.h> +#endif +#ifdef HAVE_LOCALE_H +#include <locale.h> +#endif #include <math.h> #define DEFAULT_SCORE 10.0 @@ -2614,4 +2634,357 @@ rspamd_config_ev_backend_to_string (int ev_backend, gboolean *effective) SET_EFFECTIVE (FALSE); return "unknown"; #undef SET_EFFECTIVE +} + +static void +rspamd_openssl_maybe_init (void) +{ + static gboolean openssl_initialized = FALSE; + + if (!openssl_initialized) { + ERR_load_crypto_strings (); + SSL_load_error_strings (); + + OpenSSL_add_all_algorithms (); + OpenSSL_add_all_digests (); + OpenSSL_add_all_ciphers (); + +#if OPENSSL_VERSION_NUMBER >= 0x1000104fL && !defined(LIBRESSL_VERSION_NUMBER) + ENGINE_load_builtin_engines (); +#endif +#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER) + SSL_library_init (); +#else + OPENSSL_init_ssl (0, NULL); +#endif + +#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER) + OPENSSL_config (NULL); +#endif + if (RAND_status () == 0) { + guchar seed[128]; + + /* Try to use ottery to seed rand */ + ottery_rand_bytes (seed, sizeof (seed)); + RAND_seed (seed, sizeof (seed)); + rspamd_explicit_memzero (seed, sizeof (seed)); + } + + openssl_initialized = TRUE; + } +} + +struct rspamd_external_libs_ctx * +rspamd_init_libs (void) +{ + struct rlimit rlim; + struct rspamd_external_libs_ctx *ctx; + struct ottery_config *ottery_cfg; + + ctx = g_malloc0 (sizeof (*ctx)); + ctx->crypto_ctx = rspamd_cryptobox_init (); + ottery_cfg = g_malloc0 (ottery_get_sizeof_config ()); + ottery_config_init (ottery_cfg); + ctx->ottery_cfg = ottery_cfg; + + rspamd_openssl_maybe_init (); + + /* Check if we have rdrand */ + if ((ctx->crypto_ctx->cpu_config & CPUID_RDRAND) == 0) { + ottery_config_disable_entropy_sources (ottery_cfg, + OTTERY_ENTROPY_SRC_RDRAND); +#if OPENSSL_VERSION_NUMBER >= 0x1000104fL && !defined(LIBRESSL_VERSION_NUMBER) + RAND_set_rand_engine (NULL); +#endif + } + + /* Configure utf8 library */ + guint utf8_flags = 0; + + if ((ctx->crypto_ctx->cpu_config & CPUID_SSE41)) { + utf8_flags |= RSPAMD_FAST_UTF8_FLAG_SSE41; + } + if ((ctx->crypto_ctx->cpu_config & CPUID_AVX2)) { + utf8_flags |= RSPAMD_FAST_UTF8_FLAG_AVX2; + } + + rspamd_fast_utf8_library_init (utf8_flags); + + g_assert (ottery_init (ottery_cfg) == 0); + +#ifdef HAVE_LOCALE_H + if (getenv ("LANG") == NULL) { + setlocale (LC_ALL, "C"); + setlocale (LC_CTYPE, "C"); + setlocale (LC_MESSAGES, "C"); + setlocale (LC_TIME, "C"); + } + else { + /* Just set the default locale */ + setlocale (LC_ALL, ""); + /* But for some issues we still want C locale */ + setlocale (LC_NUMERIC, "C"); + } +#endif + + ctx->ssl_ctx = rspamd_init_ssl_ctx (); + ctx->ssl_ctx_noverify = rspamd_init_ssl_ctx_noverify (); + rspamd_random_seed_fast (); + + /* Set stack size for pcre */ + getrlimit (RLIMIT_STACK, &rlim); + rlim.rlim_cur = 100 * 1024 * 1024; + rlim.rlim_max = rlim.rlim_cur; + setrlimit (RLIMIT_STACK, &rlim); + + ctx->local_addrs = rspamd_inet_library_init (); + REF_INIT_RETAIN (ctx, rspamd_deinit_libs); + + return ctx; +} + +static struct zstd_dictionary * +rspamd_open_zstd_dictionary (const char *path) +{ + struct zstd_dictionary *dict; + + dict = g_malloc0 (sizeof (*dict)); + dict->dict = rspamd_file_xmap (path, PROT_READ, &dict->size, TRUE); + + if (dict->dict == NULL) { + g_free (dict); + + return NULL; + } + + dict->id = ZDICT_getDictID (dict->dict, dict->size); + + if (dict->id == 0) { + g_free (dict); + + return NULL; + } + + return dict; +} + +static void +rspamd_free_zstd_dictionary (struct zstd_dictionary *dict) +{ + if (dict) { + munmap (dict->dict, dict->size); + g_free (dict); + } +} + +gboolean +rspamd_config_libs (struct rspamd_external_libs_ctx *ctx, + struct rspamd_config *cfg) +{ + static const char secure_ciphers[] = "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4"; + size_t r; + gboolean ret = TRUE; + + g_assert (cfg != NULL); + + if (ctx != NULL) { + if (cfg->local_addrs) { + rspamd_config_radix_from_ucl (cfg, cfg->local_addrs, + "Local addresses", + ctx->local_addrs, + NULL, + NULL); + } + + rspamd_free_zstd_dictionary (ctx->in_dict); + rspamd_free_zstd_dictionary (ctx->out_dict); + + if (ctx->out_zstream) { + ZSTD_freeCStream (ctx->out_zstream); + ctx->out_zstream = NULL; + } + + if (ctx->in_zstream) { + ZSTD_freeDStream (ctx->in_zstream); + ctx->in_zstream = NULL; + } + + if (cfg->zstd_input_dictionary) { + ctx->in_dict = rspamd_open_zstd_dictionary ( + cfg->zstd_input_dictionary); + + if (ctx->in_dict == NULL) { + msg_err_config ("cannot open zstd dictionary in %s", + cfg->zstd_input_dictionary); + } + } + if (cfg->zstd_output_dictionary) { + ctx->out_dict = rspamd_open_zstd_dictionary ( + cfg->zstd_output_dictionary); + + if (ctx->out_dict == NULL) { + msg_err_config ("cannot open zstd dictionary in %s", + cfg->zstd_output_dictionary); + } + } + + if (cfg->fips_mode) { +#ifdef HAVE_FIPS_MODE + int mode = FIPS_mode (); + unsigned long err = (unsigned long)-1; + + /* Toggle FIPS mode */ + if (mode == 0) { + if (FIPS_mode_set (1) != 1) { + err = ERR_get_error (); + } + } + else { + msg_info_config ("OpenSSL FIPS mode is already enabled"); + } + + if (err != (unsigned long)-1) { + msg_err_config ("FIPS_mode_set failed: %s", + ERR_error_string (err, NULL)); + ret = FALSE; + } + else { + msg_info_config ("OpenSSL FIPS mode is enabled"); + } +#else + msg_warn_config ("SSL FIPS mode is enabled but not supported by OpenSSL library!"); +#endif + } + + if (cfg->ssl_ca_path) { + if (SSL_CTX_load_verify_locations (ctx->ssl_ctx, cfg->ssl_ca_path, + NULL) != 1) { + msg_err_config ("cannot load CA certs from %s: %s", + cfg->ssl_ca_path, + ERR_error_string (ERR_get_error (), NULL)); + } + } + else { + msg_debug_config ("ssl_ca_path is not set, using default CA path"); + SSL_CTX_set_default_verify_paths (ctx->ssl_ctx); + } + + if (cfg->ssl_ciphers) { + if (SSL_CTX_set_cipher_list (ctx->ssl_ctx, cfg->ssl_ciphers) != 1) { + msg_err_config ( + "cannot set ciphers set to %s: %s; fallback to %s", + cfg->ssl_ciphers, + ERR_error_string (ERR_get_error (), NULL), + secure_ciphers); + /* Default settings */ + SSL_CTX_set_cipher_list (ctx->ssl_ctx, secure_ciphers); + } + } + + /* Init decompression */ + ctx->in_zstream = ZSTD_createDStream (); + r = ZSTD_initDStream (ctx->in_zstream); + + if (ZSTD_isError (r)) { + msg_err ("cannot init decompression stream: %s", + ZSTD_getErrorName (r)); + ZSTD_freeDStream (ctx->in_zstream); + ctx->in_zstream = NULL; + } + + /* Init compression */ + ctx->out_zstream = ZSTD_createCStream (); + r = ZSTD_initCStream (ctx->out_zstream, 1); + + if (ZSTD_isError (r)) { + msg_err ("cannot init compression stream: %s", + ZSTD_getErrorName (r)); + ZSTD_freeCStream (ctx->out_zstream); + ctx->out_zstream = NULL; + } +#ifdef HAVE_CBLAS + openblas_set_num_threads (cfg->max_blas_threads); +#endif + } + + return ret; +} + +gboolean +rspamd_libs_reset_decompression (struct rspamd_external_libs_ctx *ctx) +{ + gsize r; + + if (ctx->in_zstream == NULL) { + return FALSE; + } + else { + r = ZSTD_resetDStream (ctx->in_zstream); + + if (ZSTD_isError (r)) { + msg_err ("cannot init decompression stream: %s", + ZSTD_getErrorName (r)); + ZSTD_freeDStream (ctx->in_zstream); + ctx->in_zstream = NULL; + + return FALSE; + } + } + + return TRUE; +} + +gboolean +rspamd_libs_reset_compression (struct rspamd_external_libs_ctx *ctx) +{ + gsize r; + + if (ctx->out_zstream == NULL) { + return FALSE; + } + else { + /* Dictionary will be reused automatically if specified */ + r = ZSTD_resetCStream (ctx->out_zstream, 0); + + if (ZSTD_isError (r)) { + msg_err ("cannot init compression stream: %s", + ZSTD_getErrorName (r)); + ZSTD_freeCStream (ctx->out_zstream); + ctx->out_zstream = NULL; + + return FALSE; + } + } + + return TRUE; +} + +void +rspamd_deinit_libs (struct rspamd_external_libs_ctx *ctx) +{ + if (ctx != NULL) { + g_free (ctx->ottery_cfg); + +#ifdef HAVE_OPENSSL + EVP_cleanup (); + ERR_free_strings (); + SSL_CTX_free (ctx->ssl_ctx); + SSL_CTX_free (ctx->ssl_ctx_noverify); +#endif + rspamd_inet_library_destroy (); + rspamd_free_zstd_dictionary (ctx->in_dict); + rspamd_free_zstd_dictionary (ctx->out_dict); + + if (ctx->out_zstream) { + ZSTD_freeCStream (ctx->out_zstream); + } + + if (ctx->in_zstream) { + ZSTD_freeDStream (ctx->in_zstream); + } + + rspamd_cryptobox_deinit (ctx->crypto_ctx); + + g_free (ctx); + } }
\ No newline at end of file diff --git a/src/libserver/dynamic_cfg.c b/src/libserver/dynamic_cfg.c index a39778ec2..8b1f464f1 100644 --- a/src/libserver/dynamic_cfg.c +++ b/src/libserver/dynamic_cfg.c @@ -15,7 +15,7 @@ */ #include "config.h" #include "rspamd.h" -#include "map.h" +#include "libserver/maps/map.h" #include "scan_result.h" #include "dynamic_cfg.h" #include "unix-std.h" diff --git a/src/libserver/http/http_connection.c b/src/libserver/http/http_connection.c new file mode 100644 index 000000000..28a13f7ba --- /dev/null +++ b/src/libserver/http/http_connection.c @@ -0,0 +1,2534 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "http_connection.h" +#include "http_private.h" +#include "http_message.h" +#include "utlist.h" +#include "util.h" +#include "printf.h" +#include "logger.h" +#include "ref.h" +#include "ottery.h" +#include "keypair_private.h" +#include "cryptobox.h" +#include "libutil/libev_helper.h" +#include "libserver/ssl_util.h" +#include "libserver/url.h" + +#include "contrib/mumhash/mum.h" +#include "contrib/http-parser/http_parser.h" +#include "unix-std.h" + +#include <openssl/err.h> + +#define ENCRYPTED_VERSION " HTTP/1.0" + +struct _rspamd_http_privbuf { + rspamd_fstring_t *data; + const gchar *zc_buf; + gsize zc_remain; + ref_entry_t ref; +}; + +enum rspamd_http_priv_flags { + RSPAMD_HTTP_CONN_FLAG_ENCRYPTED = 1u << 0u, + RSPAMD_HTTP_CONN_FLAG_NEW_HEADER = 1u << 1u, + RSPAMD_HTTP_CONN_FLAG_RESETED = 1u << 2u, + RSPAMD_HTTP_CONN_FLAG_TOO_LARGE = 1u << 3u, + RSPAMD_HTTP_CONN_FLAG_ENCRYPTION_NEEDED = 1u << 4u, + RSPAMD_HTTP_CONN_FLAG_PROXY = 1u << 5u, + RSPAMD_HTTP_CONN_FLAG_PROXY_REQUEST = 1u << 6u, + RSPAMD_HTTP_CONN_OWN_SOCKET = 1u << 7u, +}; + +#define IS_CONN_ENCRYPTED(c) ((c)->flags & RSPAMD_HTTP_CONN_FLAG_ENCRYPTED) +#define IS_CONN_RESETED(c) ((c)->flags & RSPAMD_HTTP_CONN_FLAG_RESETED) + +struct rspamd_http_connection_private { + struct rspamd_http_context *ctx; + struct rspamd_ssl_connection *ssl; + struct _rspamd_http_privbuf *buf; + struct rspamd_keypair_cache *cache; + struct rspamd_cryptobox_pubkey *peer_key; + struct rspamd_cryptobox_keypair *local_key; + struct rspamd_http_header *header; + struct http_parser parser; + struct http_parser_settings parser_cb; + struct rspamd_io_ev ev; + ev_tstamp timeout; + struct rspamd_http_message *msg; + struct iovec *out; + guint outlen; + enum rspamd_http_priv_flags flags; + gsize wr_pos; + gsize wr_total; +}; + +static const rspamd_ftok_t key_header = { + .begin = "Key", + .len = 3 +}; +static const rspamd_ftok_t date_header = { + .begin = "Date", + .len = 4 +}; +static const rspamd_ftok_t last_modified_header = { + .begin = "Last-Modified", + .len = 13 +}; + + + +#define HTTP_ERROR http_error_quark () +GQuark +http_error_quark (void) +{ + return g_quark_from_static_string ("http-error-quark"); +} + +static void +rspamd_http_privbuf_dtor (gpointer ud) +{ + struct _rspamd_http_privbuf *p = (struct _rspamd_http_privbuf *)ud; + + if (p->data) { + rspamd_fstring_free (p->data); + } + + g_free (p); +} + +static const gchar * +rspamd_http_code_to_str (gint code) +{ + if (code == 200) { + return "OK"; + } + else if (code == 404) { + return "Not found"; + } + else if (code == 403 || code == 401) { + return "Not authorized"; + } + else if (code >= 400 && code < 500) { + return "Bad request"; + } + else if (code >= 300 && code < 400) { + return "See Other"; + } + else if (code >= 500 && code < 600) { + return "Internal server error"; + } + + return "Unknown error"; +} + +static void +rspamd_http_parse_key (rspamd_ftok_t *data, struct rspamd_http_connection *conn, + struct rspamd_http_connection_private *priv) +{ + guchar *decoded_id; + const gchar *eq_pos; + gsize id_len; + struct rspamd_cryptobox_pubkey *pk; + + if (priv->local_key == NULL) { + /* In this case we cannot do anything, e.g. we cannot decrypt payload */ + priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_ENCRYPTED; + } + else { + /* Check sanity of what we have */ + eq_pos = memchr (data->begin, '=', data->len); + if (eq_pos != NULL) { + decoded_id = rspamd_decode_base32 (data->begin, eq_pos - data->begin, + &id_len); + + if (decoded_id != NULL && id_len >= RSPAMD_KEYPAIR_SHORT_ID_LEN) { + pk = rspamd_pubkey_from_base32 (eq_pos + 1, + data->begin + data->len - eq_pos - 1, + RSPAMD_KEYPAIR_KEX, + RSPAMD_CRYPTOBOX_MODE_25519); + if (pk != NULL) { + if (memcmp (rspamd_keypair_get_id (priv->local_key), + decoded_id, + RSPAMD_KEYPAIR_SHORT_ID_LEN) == 0) { + priv->msg->peer_key = pk; + + if (priv->cache && priv->msg->peer_key) { + rspamd_keypair_cache_process (priv->cache, + priv->local_key, + priv->msg->peer_key); + } + } + else { + rspamd_pubkey_unref (pk); + } + } + } + + priv->flags |= RSPAMD_HTTP_CONN_FLAG_ENCRYPTED; + g_free (decoded_id); + } + } +} + +static inline void +rspamd_http_check_special_header (struct rspamd_http_connection *conn, + struct rspamd_http_connection_private *priv) +{ + if (rspamd_ftok_casecmp (&priv->header->name, &date_header) == 0) { + priv->msg->date = rspamd_http_parse_date (priv->header->value.begin, + priv->header->value.len); + } + else if (rspamd_ftok_casecmp (&priv->header->name, &key_header) == 0) { + rspamd_http_parse_key (&priv->header->value, conn, priv); + } + else if (rspamd_ftok_casecmp (&priv->header->name, &last_modified_header) == 0) { + priv->msg->last_modified = rspamd_http_parse_date ( + priv->header->value.begin, + priv->header->value.len); + } +} + +static gint +rspamd_http_on_url (http_parser * parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = + (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + priv->msg->url = rspamd_fstring_append (priv->msg->url, at, length); + + return 0; +} + +static gint +rspamd_http_on_status (http_parser * parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = + (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (parser->status_code != 200) { + if (priv->msg->status == NULL) { + priv->msg->status = rspamd_fstring_new (); + } + + priv->msg->status = rspamd_fstring_append (priv->msg->status, at, length); + } + + return 0; +} + +static void +rspamd_http_finish_header (struct rspamd_http_connection *conn, + struct rspamd_http_connection_private *priv) +{ + struct rspamd_http_header *hdr; + khiter_t k; + gint r; + + priv->header->combined = rspamd_fstring_append (priv->header->combined, + "\r\n", 2); + priv->header->value.len = priv->header->combined->len - + priv->header->name.len - 4; + priv->header->value.begin = priv->header->combined->str + + priv->header->name.len + 2; + priv->header->name.begin = priv->header->combined->str; + + k = kh_put (rspamd_http_headers_hash, priv->msg->headers, &priv->header->name, + &r); + + if (r != 0) { + kh_value (priv->msg->headers, k) = priv->header; + hdr = NULL; + } + else { + hdr = kh_value (priv->msg->headers, k); + } + + DL_APPEND (hdr, priv->header); + + rspamd_http_check_special_header (conn, priv); +} + +static void +rspamd_http_init_header (struct rspamd_http_connection_private *priv) +{ + priv->header = g_malloc0 (sizeof (struct rspamd_http_header)); + priv->header->combined = rspamd_fstring_new (); +} + +static gint +rspamd_http_on_header_field (http_parser * parser, + const gchar *at, + size_t length) +{ + struct rspamd_http_connection *conn = + (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv->header == NULL) { + rspamd_http_init_header (priv); + } + else if (priv->flags & RSPAMD_HTTP_CONN_FLAG_NEW_HEADER) { + rspamd_http_finish_header (conn, priv); + rspamd_http_init_header (priv); + } + + priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_NEW_HEADER; + priv->header->combined = rspamd_fstring_append (priv->header->combined, + at, length); + + return 0; +} + +static gint +rspamd_http_on_header_value (http_parser * parser, + const gchar *at, + size_t length) +{ + struct rspamd_http_connection *conn = + (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv->header == NULL) { + /* Should not happen */ + return -1; + } + + if (!(priv->flags & RSPAMD_HTTP_CONN_FLAG_NEW_HEADER)) { + priv->flags |= RSPAMD_HTTP_CONN_FLAG_NEW_HEADER; + priv->header->combined = rspamd_fstring_append (priv->header->combined, + ": ", 2); + priv->header->name.len = priv->header->combined->len - 2; + } + + priv->header->combined = rspamd_fstring_append (priv->header->combined, + at, length); + + return 0; +} + +static int +rspamd_http_on_headers_complete (http_parser * parser) +{ + struct rspamd_http_connection *conn = + (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; + int ret; + + priv = conn->priv; + msg = priv->msg; + + if (priv->header != NULL) { + rspamd_http_finish_header (conn, priv); + + priv->header = NULL; + priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_NEW_HEADER; + } + + if (msg->method == HTTP_HEAD) { + /* We don't care about the rest */ + rspamd_ev_watcher_stop (priv->ctx->event_loop, &priv->ev); + + msg->code = parser->status_code; + rspamd_http_connection_ref (conn); + ret = conn->finish_handler (conn, msg); + + if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) { + rspamd_http_context_push_keepalive (conn->priv->ctx, conn, + msg, conn->priv->ctx->event_loop); + rspamd_http_connection_reset (conn); + } + else { + conn->finished = TRUE; + } + + rspamd_http_connection_unref (conn); + + return ret; + } + + /* + * HTTP parser sets content length to (-1) when it doesn't know the real + * length, for example, in case of chunked encoding. + * + * Hence, we skip body setup here + */ + if (parser->content_length != ULLONG_MAX && parser->content_length != 0 && + msg->method != HTTP_HEAD) { + if (conn->max_size > 0 && + parser->content_length > conn->max_size) { + /* Too large message */ + priv->flags |= RSPAMD_HTTP_CONN_FLAG_TOO_LARGE; + return -1; + } + + if (!rspamd_http_message_set_body (msg, NULL, parser->content_length)) { + return -1; + } + } + + if (parser->flags & F_SPAMC) { + msg->flags |= RSPAMD_HTTP_FLAG_SPAMC; + } + + + msg->method = parser->method; + msg->code = parser->status_code; + + return 0; +} + +static void +rspamd_http_switch_zc (struct _rspamd_http_privbuf *pbuf, + struct rspamd_http_message *msg) +{ + pbuf->zc_buf = msg->body_buf.begin + msg->body_buf.len; + pbuf->zc_remain = msg->body_buf.allocated_len - msg->body_buf.len; +} + +static int +rspamd_http_on_body (http_parser * parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = + (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; + struct _rspamd_http_privbuf *pbuf; + const gchar *p; + + priv = conn->priv; + msg = priv->msg; + pbuf = priv->buf; + p = at; + + if (!(msg->flags & RSPAMD_HTTP_FLAG_HAS_BODY)) { + if (!rspamd_http_message_set_body (msg, NULL, parser->content_length)) { + return -1; + } + } + + if (conn->finished) { + return 0; + } + + if (conn->max_size > 0 && + msg->body_buf.len + length > conn->max_size) { + /* Body length overflow */ + priv->flags |= RSPAMD_HTTP_CONN_FLAG_TOO_LARGE; + return -1; + } + + if (!pbuf->zc_buf) { + if (!rspamd_http_message_append_body (msg, at, length)) { + return -1; + } + + /* We might have some leftover in our private buffer */ + if (pbuf->data->len == length) { + /* Switch to zero-copy mode */ + rspamd_http_switch_zc (pbuf, msg); + } + } + else { + if (msg->body_buf.begin + msg->body_buf.len != at) { + /* Likely chunked encoding */ + memmove ((gchar *)msg->body_buf.begin + msg->body_buf.len, at, length); + p = msg->body_buf.begin + msg->body_buf.len; + } + + /* Adjust zero-copy buf */ + msg->body_buf.len += length; + + if (!(msg->flags & RSPAMD_HTTP_FLAG_SHMEM)) { + msg->body_buf.c.normal->len += length; + } + + pbuf->zc_buf = msg->body_buf.begin + msg->body_buf.len; + pbuf->zc_remain = msg->body_buf.allocated_len - msg->body_buf.len; + } + + if ((conn->opts & RSPAMD_HTTP_BODY_PARTIAL) && !IS_CONN_ENCRYPTED (priv)) { + /* Incremental update is impossible for encrypted requests so far */ + return (conn->body_handler (conn, msg, p, length)); + } + + return 0; +} + +static int +rspamd_http_on_body_decrypted (http_parser * parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = + (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv->header != NULL) { + rspamd_http_finish_header (conn, priv); + priv->header = NULL; + } + + if (conn->finished) { + return 0; + } + + if (priv->msg->body_buf.len == 0) { + + priv->msg->body_buf.begin = at; + priv->msg->method = parser->method; + priv->msg->code = parser->status_code; + } + + priv->msg->body_buf.len += length; + + return 0; +} + +static int +rspamd_http_on_headers_complete_decrypted (http_parser *parser) +{ + struct rspamd_http_connection *conn = + (struct rspamd_http_connection *) parser->data; + struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; + int ret; + + priv = conn->priv; + msg = priv->msg; + + if (priv->header != NULL) { + rspamd_http_finish_header (conn, priv); + + priv->header = NULL; + priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_NEW_HEADER; + } + + if (parser->flags & F_SPAMC) { + priv->msg->flags |= RSPAMD_HTTP_FLAG_SPAMC; + } + + if (msg->method == HTTP_HEAD) { + /* We don't care about the rest */ + rspamd_ev_watcher_stop (priv->ctx->event_loop, &priv->ev); + msg->code = parser->status_code; + rspamd_http_connection_ref (conn); + ret = conn->finish_handler (conn, msg); + + if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) { + rspamd_http_context_push_keepalive (conn->priv->ctx, conn, + msg, conn->priv->ctx->event_loop); + rspamd_http_connection_reset (conn); + } + else { + conn->finished = TRUE; + } + + rspamd_http_connection_unref (conn); + + return ret; + } + + priv->msg->method = parser->method; + priv->msg->code = parser->status_code; + + return 0; +} + +static int +rspamd_http_decrypt_message (struct rspamd_http_connection *conn, + struct rspamd_http_connection_private *priv, + struct rspamd_cryptobox_pubkey *peer_key) +{ + guchar *nonce, *m; + const guchar *nm; + gsize dec_len; + struct rspamd_http_message *msg = priv->msg; + struct rspamd_http_header *hdr, *hcur, *hcurtmp; + struct http_parser decrypted_parser; + struct http_parser_settings decrypted_cb; + enum rspamd_cryptobox_mode mode; + + mode = rspamd_keypair_alg (priv->local_key); + nonce = msg->body_buf.str; + m = msg->body_buf.str + rspamd_cryptobox_nonce_bytes (mode) + + rspamd_cryptobox_mac_bytes (mode); + dec_len = msg->body_buf.len - rspamd_cryptobox_nonce_bytes (mode) - + rspamd_cryptobox_mac_bytes (mode); + + if ((nm = rspamd_pubkey_get_nm (peer_key, priv->local_key)) == NULL) { + nm = rspamd_pubkey_calculate_nm (peer_key, priv->local_key); + } + + if (!rspamd_cryptobox_decrypt_nm_inplace (m, dec_len, nonce, + nm, m - rspamd_cryptobox_mac_bytes (mode), mode)) { + msg_err ("cannot verify encrypted message, first bytes of the input: %*xs", + (gint)MIN(msg->body_buf.len, 64), msg->body_buf.begin); + return -1; + } + + /* Cleanup message */ + kh_foreach_value (msg->headers, hdr, { + DL_FOREACH_SAFE (hdr, hcur, hcurtmp) { + rspamd_fstring_free (hcur->combined); + g_free (hcur); + } + }); + + kh_destroy (rspamd_http_headers_hash, msg->headers); + msg->headers = kh_init (rspamd_http_headers_hash); + + if (msg->url != NULL) { + msg->url = rspamd_fstring_assign (msg->url, "", 0); + } + + msg->body_buf.len = 0; + + memset (&decrypted_parser, 0, sizeof (decrypted_parser)); + http_parser_init (&decrypted_parser, + conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); + + memset (&decrypted_cb, 0, sizeof (decrypted_cb)); + decrypted_cb.on_url = rspamd_http_on_url; + decrypted_cb.on_status = rspamd_http_on_status; + decrypted_cb.on_header_field = rspamd_http_on_header_field; + decrypted_cb.on_header_value = rspamd_http_on_header_value; + decrypted_cb.on_headers_complete = rspamd_http_on_headers_complete_decrypted; + decrypted_cb.on_body = rspamd_http_on_body_decrypted; + decrypted_parser.data = conn; + decrypted_parser.content_length = dec_len; + + if (http_parser_execute (&decrypted_parser, &decrypted_cb, m, + dec_len) != (size_t)dec_len) { + msg_err ("HTTP parser error: %s when parsing encrypted request", + http_errno_description (decrypted_parser.http_errno)); + return -1; + } + + return 0; +} + +static int +rspamd_http_on_message_complete (http_parser * parser) +{ + struct rspamd_http_connection *conn = + (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + int ret = 0; + enum rspamd_cryptobox_mode mode; + + if (conn->finished) { + return 0; + } + + priv = conn->priv; + + if ((conn->opts & RSPAMD_HTTP_REQUIRE_ENCRYPTION) && !IS_CONN_ENCRYPTED (priv)) { + priv->flags |= RSPAMD_HTTP_CONN_FLAG_ENCRYPTION_NEEDED; + msg_err ("unencrypted connection when encryption has been requested"); + return -1; + } + + if ((conn->opts & RSPAMD_HTTP_BODY_PARTIAL) == 0 && IS_CONN_ENCRYPTED (priv)) { + mode = rspamd_keypair_alg (priv->local_key); + + if (priv->local_key == NULL || priv->msg->peer_key == NULL || + priv->msg->body_buf.len < rspamd_cryptobox_nonce_bytes (mode) + + rspamd_cryptobox_mac_bytes (mode)) { + msg_err ("cannot decrypt message"); + return -1; + } + + /* We have keys, so we can decrypt message */ + ret = rspamd_http_decrypt_message (conn, priv, priv->msg->peer_key); + + if (ret != 0) { + return ret; + } + + if (conn->body_handler != NULL) { + rspamd_http_connection_ref (conn); + ret = conn->body_handler (conn, + priv->msg, + priv->msg->body_buf.begin, + priv->msg->body_buf.len); + rspamd_http_connection_unref (conn); + } + } + else if ((conn->opts & RSPAMD_HTTP_BODY_PARTIAL) == 0 && conn->body_handler) { + g_assert (conn->body_handler != NULL); + rspamd_http_connection_ref (conn); + ret = conn->body_handler (conn, + priv->msg, + priv->msg->body_buf.begin, + priv->msg->body_buf.len); + rspamd_http_connection_unref (conn); + } + + if (ret == 0) { + rspamd_ev_watcher_stop (priv->ctx->event_loop, &priv->ev); + rspamd_http_connection_ref (conn); + ret = conn->finish_handler (conn, priv->msg); + + if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) { + rspamd_http_context_push_keepalive (conn->priv->ctx, conn, + priv->msg, conn->priv->ctx->event_loop); + rspamd_http_connection_reset (conn); + } + else { + conn->finished = TRUE; + } + + rspamd_http_connection_unref (conn); + } + + return ret; +} + +static void +rspamd_http_simple_client_helper (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + gpointer ssl; + gint request_method; + GString *prev_host = NULL; + + priv = conn->priv; + ssl = priv->ssl; + priv->ssl = NULL; + + /* Preserve data */ + if (priv->msg) { + request_method = priv->msg->method; + /* Preserve host for keepalive */ + prev_host = priv->msg->host; + priv->msg->host = NULL; + } + + rspamd_http_connection_reset (conn); + priv->ssl = ssl; + + /* Plan read message */ + + if (conn->opts & RSPAMD_HTTP_CLIENT_SHARED) { + rspamd_http_connection_read_message_shared (conn, conn->ud, + conn->priv->timeout); + } + else { + rspamd_http_connection_read_message (conn, conn->ud, + conn->priv->timeout); + } + + if (priv->msg) { + priv->msg->method = request_method; + priv->msg->host = prev_host; + } + else { + if (prev_host) { + g_string_free (prev_host, TRUE); + } + } +} + +static void +rspamd_http_write_helper (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + struct iovec *start; + guint niov, i; + gint flags = 0; + gsize remain; + gssize r; + GError *err; + struct iovec *cur_iov; + struct msghdr msg; + + priv = conn->priv; + + if (priv->wr_pos == priv->wr_total) { + goto call_finish_handler; + } + + start = &priv->out[0]; + niov = priv->outlen; + remain = priv->wr_pos; + /* We know that niov is small enough for that */ + if (priv->ssl) { + /* Might be recursive! */ + cur_iov = g_malloc (niov * sizeof (struct iovec)); + } + else { + cur_iov = alloca (niov * sizeof (struct iovec)); + } + memcpy (cur_iov, priv->out, niov * sizeof (struct iovec)); + for (i = 0; i < priv->outlen && remain > 0; i++) { + /* Find out the first iov required */ + start = &cur_iov[i]; + if (start->iov_len <= remain) { + remain -= start->iov_len; + start = &cur_iov[i + 1]; + niov--; + } + else { + start->iov_base = (void *)((char *)start->iov_base + remain); + start->iov_len -= remain; + remain = 0; + } + } + + memset (&msg, 0, sizeof (msg)); + msg.msg_iov = start; + msg.msg_iovlen = MIN (IOV_MAX, niov); + g_assert (niov > 0); +#ifdef MSG_NOSIGNAL + flags = MSG_NOSIGNAL; +#endif + + if (priv->ssl) { + r = rspamd_ssl_writev (priv->ssl, msg.msg_iov, msg.msg_iovlen); + g_free (cur_iov); + } + else { + r = sendmsg (conn->fd, &msg, flags); + } + + if (r == -1) { + if (!priv->ssl) { + err = g_error_new (HTTP_ERROR, errno, "IO write error: %s", strerror (errno)); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + } + + return; + } + else { + priv->wr_pos += r; + } + + if (priv->wr_pos >= priv->wr_total) { + goto call_finish_handler; + } + else { + /* Want to write more */ + priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_RESETED; + + if (priv->ssl && r > 0) { + /* We can write more data... */ + rspamd_http_write_helper (conn); + return; + } + } + + return; + +call_finish_handler: + rspamd_ev_watcher_stop (priv->ctx->event_loop, &priv->ev); + + if ((conn->opts & RSPAMD_HTTP_CLIENT_SIMPLE) == 0) { + rspamd_http_connection_ref (conn); + conn->finished = TRUE; + conn->finish_handler (conn, priv->msg); + rspamd_http_connection_unref (conn); + } + else { + /* Plan read message */ + rspamd_http_simple_client_helper (conn); + } +} + +static gssize +rspamd_http_try_read (gint fd, + struct rspamd_http_connection *conn, + struct rspamd_http_connection_private *priv, + struct _rspamd_http_privbuf *pbuf, + const gchar **buf_ptr) +{ + gssize r; + gchar *data; + gsize len; + struct rspamd_http_message *msg; + + msg = priv->msg; + + if (pbuf->zc_buf == NULL) { + data = priv->buf->data->str; + len = priv->buf->data->allocated; + } + else { + data = (gchar *)pbuf->zc_buf; + len = pbuf->zc_remain; + + if (len == 0) { + rspamd_http_message_grow_body (priv->msg, priv->buf->data->allocated); + rspamd_http_switch_zc (pbuf, msg); + data = (gchar *)pbuf->zc_buf; + len = pbuf->zc_remain; + } + } + + if (priv->ssl) { + r = rspamd_ssl_read (priv->ssl, data, len); + } + else { + r = read (fd, data, len); + } + + if (r <= 0) { + return r; + } + else { + if (pbuf->zc_buf == NULL) { + priv->buf->data->len = r; + } + else { + pbuf->zc_remain -= r; + pbuf->zc_buf += r; + } + } + + if (buf_ptr) { + *buf_ptr = data; + } + + return r; +} + +static void +rspamd_http_ssl_err_handler (gpointer ud, GError *err) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)ud; + + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); +} + +static void +rspamd_http_event_handler (int fd, short what, gpointer ud) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)ud; + struct rspamd_http_connection_private *priv; + struct _rspamd_http_privbuf *pbuf; + const gchar *d; + gssize r; + GError *err; + + priv = conn->priv; + pbuf = priv->buf; + REF_RETAIN (pbuf); + rspamd_http_connection_ref (conn); + + if (what == EV_READ) { + r = rspamd_http_try_read (fd, conn, priv, pbuf, &d); + + if (r > 0) { + if (http_parser_execute (&priv->parser, &priv->parser_cb, + d, r) != (size_t)r || priv->parser.http_errno != 0) { + if (priv->flags & RSPAMD_HTTP_CONN_FLAG_TOO_LARGE) { + err = g_error_new (HTTP_ERROR, 413, + "Request entity too large: %zu", + (size_t)priv->parser.content_length); + } + else if (priv->flags & RSPAMD_HTTP_CONN_FLAG_ENCRYPTION_NEEDED) { + err = g_error_new (HTTP_ERROR, 400, + "Encryption required"); + } + else if (priv->parser.http_errno == HPE_CLOSED_CONNECTION) { + msg_err ("got garbage after end of the message, ignore it"); + + REF_RELEASE (pbuf); + rspamd_http_connection_unref (conn); + + return; + } + else { + err = g_error_new (HTTP_ERROR, 500 + priv->parser.http_errno, + "HTTP parser error: %s", + http_errno_description (priv->parser.http_errno)); + } + + if (!conn->finished) { + conn->error_handler (conn, err); + } + else { + msg_err ("got error after HTTP request is finished: %e", err); + } + + g_error_free (err); + + REF_RELEASE (pbuf); + rspamd_http_connection_unref (conn); + + return; + } + } + else if (r == 0) { + /* We can still call http parser */ + http_parser_execute (&priv->parser, &priv->parser_cb, d, r); + + if (!conn->finished) { + err = g_error_new (HTTP_ERROR, + errno, + "IO read error: unexpected EOF"); + conn->error_handler (conn, err); + g_error_free (err); + } + REF_RELEASE (pbuf); + rspamd_http_connection_unref (conn); + + return; + } + else { + if (!priv->ssl) { + err = g_error_new (HTTP_ERROR, + errno, + "IO read error: %s", + strerror (errno)); + conn->error_handler (conn, err); + g_error_free (err); + } + + REF_RELEASE (pbuf); + rspamd_http_connection_unref (conn); + + return; + } + } + else if (what == EV_TIMEOUT) { + /* Let's try to read from the socket first */ + r = rspamd_http_try_read (fd, conn, priv, pbuf, &d); + + if (r > 0) { + if (http_parser_execute (&priv->parser, &priv->parser_cb, + d, r) != (size_t)r || priv->parser.http_errno != 0) { + err = g_error_new (HTTP_ERROR, priv->parser.http_errno, + "HTTP parser error: %s", + http_errno_description (priv->parser.http_errno)); + + if (!conn->finished) { + conn->error_handler (conn, err); + } + else { + msg_err ("got error after HTTP request is finished: %e", err); + } + + g_error_free (err); + + REF_RELEASE (pbuf); + rspamd_http_connection_unref (conn); + + return; + } + } + else if (r == 0) { + if (!conn->finished) { + err = g_error_new (HTTP_ERROR, ETIMEDOUT, + "IO timeout"); + conn->error_handler (conn, err); + g_error_free (err); + + } + REF_RELEASE (pbuf); + rspamd_http_connection_unref (conn); + + return; + } + else { + err = g_error_new (HTTP_ERROR, ETIMEDOUT, + "IO timeout"); + conn->error_handler (conn, err); + g_error_free (err); + + REF_RELEASE (pbuf); + rspamd_http_connection_unref (conn); + + return; + } + } + else if (what == EV_WRITE) { + rspamd_http_write_helper (conn); + } + + REF_RELEASE (pbuf); + rspamd_http_connection_unref (conn); +} + +static void +rspamd_http_parser_reset (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv = conn->priv; + + http_parser_init (&priv->parser, + conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); + + priv->parser_cb.on_url = rspamd_http_on_url; + priv->parser_cb.on_status = rspamd_http_on_status; + priv->parser_cb.on_header_field = rspamd_http_on_header_field; + priv->parser_cb.on_header_value = rspamd_http_on_header_value; + priv->parser_cb.on_headers_complete = rspamd_http_on_headers_complete; + priv->parser_cb.on_body = rspamd_http_on_body; + priv->parser_cb.on_message_complete = rspamd_http_on_message_complete; +} + +static struct rspamd_http_connection * +rspamd_http_connection_new_common (struct rspamd_http_context *ctx, + gint fd, + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + unsigned opts, + enum rspamd_http_connection_type type, + enum rspamd_http_priv_flags priv_flags, + struct upstream *proxy_upstream) +{ + struct rspamd_http_connection *conn; + struct rspamd_http_connection_private *priv; + + g_assert (error_handler != NULL && finish_handler != NULL); + + if (ctx == NULL) { + ctx = rspamd_http_context_default (); + } + + conn = g_malloc0 (sizeof (struct rspamd_http_connection)); + conn->opts = opts; + conn->type = type; + conn->body_handler = body_handler; + conn->error_handler = error_handler; + conn->finish_handler = finish_handler; + conn->fd = fd; + conn->ref = 1; + conn->finished = FALSE; + + /* Init priv */ + priv = g_malloc0 (sizeof (struct rspamd_http_connection_private)); + conn->priv = priv; + priv->ctx = ctx; + priv->flags = priv_flags; + + if (type == RSPAMD_HTTP_SERVER) { + priv->cache = ctx->server_kp_cache; + } + else { + priv->cache = ctx->client_kp_cache; + if (ctx->client_kp) { + priv->local_key = rspamd_keypair_ref (ctx->client_kp); + } + } + + rspamd_http_parser_reset (conn); + priv->parser.data = conn; + + return conn; +} + +struct rspamd_http_connection * +rspamd_http_connection_new_server (struct rspamd_http_context *ctx, + gint fd, + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + unsigned opts) +{ + return rspamd_http_connection_new_common (ctx, fd, body_handler, + error_handler, finish_handler, opts, RSPAMD_HTTP_SERVER, 0, NULL); +} + +struct rspamd_http_connection * +rspamd_http_connection_new_client_socket (struct rspamd_http_context *ctx, + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + unsigned opts, + gint fd) +{ + return rspamd_http_connection_new_common (ctx, fd, body_handler, + error_handler, finish_handler, opts, RSPAMD_HTTP_CLIENT, 0, NULL); +} + +struct rspamd_http_connection * +rspamd_http_connection_new_client (struct rspamd_http_context *ctx, + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + unsigned opts, + rspamd_inet_addr_t *addr) +{ + gint fd; + + if (ctx == NULL) { + ctx = rspamd_http_context_default (); + } + + if (ctx->http_proxies) { + struct upstream *up = rspamd_upstream_get (ctx->http_proxies, + RSPAMD_UPSTREAM_ROUND_ROBIN, NULL, 0); + + if (up) { + rspamd_inet_addr_t *proxy_addr = rspamd_upstream_addr_next (up); + + fd = rspamd_inet_address_connect (proxy_addr, SOCK_STREAM, TRUE); + + if (fd == -1) { + msg_info ("cannot connect to http proxy %s: %s", + rspamd_inet_address_to_string_pretty (proxy_addr), + strerror (errno)); + rspamd_upstream_fail (up, TRUE, strerror (errno)); + + return NULL; + } + + return rspamd_http_connection_new_common (ctx, fd, body_handler, + error_handler, finish_handler, opts, + RSPAMD_HTTP_CLIENT, + RSPAMD_HTTP_CONN_OWN_SOCKET|RSPAMD_HTTP_CONN_FLAG_PROXY, + up); + } + } + + /* Unproxied version */ + fd = rspamd_inet_address_connect (addr, SOCK_STREAM, TRUE); + + if (fd == -1) { + msg_info ("cannot connect make http connection to %s: %s", + rspamd_inet_address_to_string_pretty (addr), + strerror (errno)); + + return NULL; + } + + return rspamd_http_connection_new_common (ctx, fd, body_handler, + error_handler, finish_handler, opts, + RSPAMD_HTTP_CLIENT, + RSPAMD_HTTP_CONN_OWN_SOCKET, + NULL); +} + +struct rspamd_http_connection * +rspamd_http_connection_new_keepalive (struct rspamd_http_context *ctx, + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + rspamd_inet_addr_t *addr, + const gchar *host) +{ + struct rspamd_http_connection *conn; + + if (ctx == NULL) { + ctx = rspamd_http_context_default (); + } + + conn = rspamd_http_context_check_keepalive (ctx, addr, host); + + if (conn) { + return conn; + } + + conn = rspamd_http_connection_new_client (ctx, + body_handler, error_handler, finish_handler, + RSPAMD_HTTP_CLIENT_SIMPLE|RSPAMD_HTTP_CLIENT_KEEP_ALIVE, + addr); + + if (conn) { + rspamd_http_context_prepare_keepalive (ctx, conn, addr, host); + } + + return conn; +} + +void +rspamd_http_connection_reset (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; + + priv = conn->priv; + msg = priv->msg; + + /* Clear request */ + if (msg != NULL) { + if (msg->peer_key) { + priv->peer_key = msg->peer_key; + msg->peer_key = NULL; + } + rspamd_http_message_unref (msg); + priv->msg = NULL; + } + + conn->finished = FALSE; + /* Clear priv */ + rspamd_ev_watcher_stop (priv->ctx->event_loop, &priv->ev); + + if (!(priv->flags & RSPAMD_HTTP_CONN_FLAG_RESETED)) { + rspamd_http_parser_reset (conn); + } + + if (priv->buf != NULL) { + REF_RELEASE (priv->buf); + priv->buf = NULL; + } + + if (priv->out != NULL) { + g_free (priv->out); + priv->out = NULL; + } + + priv->flags |= RSPAMD_HTTP_CONN_FLAG_RESETED; +} + +struct rspamd_http_message * +rspamd_http_connection_steal_msg (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; + + priv = conn->priv; + msg = priv->msg; + + /* Clear request */ + if (msg != NULL) { + if (msg->peer_key) { + priv->peer_key = msg->peer_key; + msg->peer_key = NULL; + } + priv->msg = NULL; + } + + return msg; +} + +struct rspamd_http_message * +rspamd_http_connection_copy_msg (struct rspamd_http_message *msg, GError **err) +{ + struct rspamd_http_message *new_msg; + struct rspamd_http_header *hdr, *nhdr, *nhdrs, *hcur; + const gchar *old_body; + gsize old_len; + struct stat st; + union _rspamd_storage_u *storage; + + new_msg = rspamd_http_new_message (msg->type); + new_msg->flags = msg->flags; + + if (msg->body_buf.len > 0) { + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + /* Avoid copying by just maping a shared segment */ + new_msg->flags |= RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE; + + storage = &new_msg->body_buf.c; + storage->shared.shm_fd = dup (msg->body_buf.c.shared.shm_fd); + + if (storage->shared.shm_fd == -1) { + rspamd_http_message_unref (new_msg); + g_set_error (err, http_error_quark (), errno, + "cannot dup shmem fd: %d: %s", + msg->body_buf.c.shared.shm_fd, strerror (errno)); + + return NULL; + } + + if (fstat (storage->shared.shm_fd, &st) == -1) { + g_set_error (err, http_error_quark (), errno, + "cannot stat shmem fd: %d: %s", + storage->shared.shm_fd, strerror (errno)); + rspamd_http_message_unref (new_msg); + + return NULL; + } + + /* We don't own segment, so do not try to touch it */ + + if (msg->body_buf.c.shared.name) { + storage->shared.name = msg->body_buf.c.shared.name; + REF_RETAIN (storage->shared.name); + } + + new_msg->body_buf.str = mmap (NULL, st.st_size, + PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + + if (new_msg->body_buf.str == MAP_FAILED) { + g_set_error (err, http_error_quark (), errno, + "cannot mmap shmem fd: %d: %s", + storage->shared.shm_fd, strerror (errno)); + rspamd_http_message_unref (new_msg); + + return NULL; + } + + new_msg->body_buf.begin = new_msg->body_buf.str; + new_msg->body_buf.len = msg->body_buf.len; + new_msg->body_buf.begin = new_msg->body_buf.str + + (msg->body_buf.begin - msg->body_buf.str); + } + else { + old_body = rspamd_http_message_get_body (msg, &old_len); + + if (!rspamd_http_message_set_body (new_msg, old_body, old_len)) { + g_set_error (err, http_error_quark (), errno, + "cannot set body for message, length: %zd", + old_len); + rspamd_http_message_unref (new_msg); + + return NULL; + } + } + } + + if (msg->url) { + if (new_msg->url) { + new_msg->url = rspamd_fstring_append (new_msg->url, msg->url->str, + msg->url->len); + } + else { + new_msg->url = rspamd_fstring_new_init (msg->url->str, + msg->url->len); + } + } + + if (msg->host) { + new_msg->host = g_string_new_len (msg->host->str, msg->host->len); + } + + new_msg->method = msg->method; + new_msg->port = msg->port; + new_msg->date = msg->date; + new_msg->last_modified = msg->last_modified; + + kh_foreach_value (msg->headers, hdr, { + nhdrs = NULL; + + DL_FOREACH (hdr, hcur) { + nhdr = g_malloc (sizeof (struct rspamd_http_header)); + + nhdr->combined = rspamd_fstring_new_init (hcur->combined->str, + hcur->combined->len); + nhdr->name.begin = nhdr->combined->str + + (hcur->name.begin - hcur->combined->str); + nhdr->name.len = hcur->name.len; + nhdr->value.begin = nhdr->combined->str + + (hcur->value.begin - hcur->combined->str); + nhdr->value.len = hcur->value.len; + DL_APPEND (nhdrs, nhdr); + } + + gint r; + khiter_t k = kh_put (rspamd_http_headers_hash, new_msg->headers, + &nhdrs->name,&r); + + if (r != 0) { + kh_value (new_msg->headers, k) = nhdrs; + } + else { + DL_CONCAT (kh_value (new_msg->headers, k), nhdrs); + } + }); + + return new_msg; +} + +void +rspamd_http_connection_free (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv != NULL) { + rspamd_http_connection_reset (conn); + + if (priv->ssl) { + rspamd_ssl_connection_free (priv->ssl); + priv->ssl = NULL; + } + + if (priv->local_key) { + rspamd_keypair_unref (priv->local_key); + } + if (priv->peer_key) { + rspamd_pubkey_unref (priv->peer_key); + } + + if (priv->flags & RSPAMD_HTTP_CONN_OWN_SOCKET) { + /* Fd is owned by a connection */ + close (conn->fd); + } + + g_free (priv); + } + + g_free (conn); +} + +static void +rspamd_http_connection_read_message_common (struct rspamd_http_connection *conn, + gpointer ud, ev_tstamp timeout, + gint flags) +{ + struct rspamd_http_connection_private *priv = conn->priv; + struct rspamd_http_message *req; + + conn->ud = ud; + req = rspamd_http_new_message ( + conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); + priv->msg = req; + req->flags = flags; + + if (flags & RSPAMD_HTTP_FLAG_SHMEM) { + req->body_buf.c.shared.shm_fd = -1; + } + + if (priv->peer_key) { + priv->msg->peer_key = priv->peer_key; + priv->peer_key = NULL; + priv->flags |= RSPAMD_HTTP_CONN_FLAG_ENCRYPTED; + } + + priv->timeout = timeout; + priv->header = NULL; + priv->buf = g_malloc0 (sizeof (*priv->buf)); + REF_INIT_RETAIN (priv->buf, rspamd_http_privbuf_dtor); + priv->buf->data = rspamd_fstring_sized_new (8192); + priv->flags |= RSPAMD_HTTP_CONN_FLAG_NEW_HEADER; + + rspamd_ev_watcher_init (&priv->ev, conn->fd, EV_READ, + rspamd_http_event_handler, conn); + rspamd_ev_watcher_start (priv->ctx->event_loop, &priv->ev, priv->timeout); + + priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_RESETED; +} + +void +rspamd_http_connection_read_message (struct rspamd_http_connection *conn, + gpointer ud, ev_tstamp timeout) +{ + rspamd_http_connection_read_message_common (conn, ud, timeout, 0); +} + +void +rspamd_http_connection_read_message_shared (struct rspamd_http_connection *conn, + gpointer ud, ev_tstamp timeout) +{ + rspamd_http_connection_read_message_common (conn, ud, timeout, + RSPAMD_HTTP_FLAG_SHMEM); +} + +static void +rspamd_http_connection_encrypt_message ( + struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + struct rspamd_http_connection_private *priv, + guchar *pbody, + guint bodylen, + guchar *pmethod, + guint methodlen, + guint preludelen, + gint hdrcount, + guchar *np, + guchar *mp, + struct rspamd_cryptobox_pubkey *peer_key) +{ + struct rspamd_cryptobox_segment *segments; + guchar *crlfp; + const guchar *nm; + gint i, cnt; + guint outlen; + struct rspamd_http_header *hdr, *hcur; + enum rspamd_cryptobox_mode mode; + + mode = rspamd_keypair_alg (priv->local_key); + crlfp = mp + rspamd_cryptobox_mac_bytes (mode); + + outlen = priv->out[0].iov_len + priv->out[1].iov_len; + /* + * Create segments from the following: + * Method, [URL], CRLF, nheaders, CRLF, body + */ + segments = g_new (struct rspamd_cryptobox_segment, hdrcount + 5); + + segments[0].data = pmethod; + segments[0].len = methodlen; + + if (conn->type != RSPAMD_HTTP_SERVER) { + segments[1].data = msg->url->str; + segments[1].len = msg->url->len; + /* space + HTTP version + crlf */ + segments[2].data = crlfp; + segments[2].len = preludelen - 2; + crlfp += segments[2].len; + i = 3; + } + else { + /* Here we send just CRLF */ + segments[1].data = crlfp; + segments[1].len = 2; + crlfp += segments[1].len; + + i = 2; + } + + + kh_foreach_value (msg->headers, hdr, { + DL_FOREACH (hdr, hcur) { + segments[i].data = hcur->combined->str; + segments[i++].len = hcur->combined->len; + } + }); + + /* crlfp should point now at the second crlf */ + segments[i].data = crlfp; + segments[i++].len = 2; + + if (pbody) { + segments[i].data = pbody; + segments[i++].len = bodylen; + } + + cnt = i; + + if ((nm = rspamd_pubkey_get_nm (peer_key, priv->local_key)) == NULL) { + nm = rspamd_pubkey_calculate_nm (peer_key, priv->local_key); + } + + rspamd_cryptobox_encryptv_nm_inplace (segments, cnt, np, nm, mp, mode); + + /* + * iov[0] = base HTTP request + * iov[1] = CRLF + * iov[2] = nonce + * iov[3] = mac + * iov[4..i] = encrypted HTTP request/reply + */ + priv->out[2].iov_base = np; + priv->out[2].iov_len = rspamd_cryptobox_nonce_bytes (mode); + priv->out[3].iov_base = mp; + priv->out[3].iov_len = rspamd_cryptobox_mac_bytes (mode); + + outlen += rspamd_cryptobox_nonce_bytes (mode) + + rspamd_cryptobox_mac_bytes (mode); + + for (i = 0; i < cnt; i ++) { + priv->out[i + 4].iov_base = segments[i].data; + priv->out[i + 4].iov_len = segments[i].len; + outlen += segments[i].len; + } + + priv->wr_total = outlen; + + g_free (segments); +} + +static void +rspamd_http_detach_shared (struct rspamd_http_message *msg) +{ + rspamd_fstring_t *cpy_str; + + cpy_str = rspamd_fstring_new_init (msg->body_buf.begin, msg->body_buf.len); + rspamd_http_message_set_body_from_fstring_steal (msg, cpy_str); +} + +gint +rspamd_http_message_write_header (const gchar* mime_type, gboolean encrypted, + gchar *repbuf, gsize replen, gsize bodylen, gsize enclen, const gchar* host, + struct rspamd_http_connection* conn, struct rspamd_http_message* msg, + rspamd_fstring_t** buf, + struct rspamd_http_connection_private* priv, + struct rspamd_cryptobox_pubkey* peer_key) +{ + gchar datebuf[64]; + gint meth_len = 0; + const gchar *conn_type = "close"; + + if (conn->type == RSPAMD_HTTP_SERVER) { + /* Format reply */ + if (msg->method < HTTP_SYMBOLS) { + rspamd_ftok_t status; + + rspamd_http_date_format (datebuf, sizeof (datebuf), msg->date); + + if (mime_type == NULL) { + mime_type = + encrypted ? "application/octet-stream" : "text/plain"; + } + + if (msg->status == NULL || msg->status->len == 0) { + if (msg->code == 200) { + RSPAMD_FTOK_ASSIGN (&status, "OK"); + } + else if (msg->code == 404) { + RSPAMD_FTOK_ASSIGN (&status, "Not Found"); + } + else if (msg->code == 403) { + RSPAMD_FTOK_ASSIGN (&status, "Forbidden"); + } + else if (msg->code >= 500 && msg->code < 600) { + RSPAMD_FTOK_ASSIGN (&status, "Internal Server Error"); + } + else { + RSPAMD_FTOK_ASSIGN (&status, "Undefined Error"); + } + } + else { + status.begin = msg->status->str; + status.len = msg->status->len; + } + + if (encrypted) { + /* Internal reply (encrypted) */ + if (mime_type) { + meth_len = + rspamd_snprintf (repbuf, replen, + "HTTP/1.1 %d %T\r\n" + "Connection: close\r\n" + "Server: %s\r\n" + "Date: %s\r\n" + "Content-Length: %z\r\n" + "Content-Type: %s", /* NO \r\n at the end ! */ + msg->code, &status, priv->ctx->config.server_hdr, + datebuf, + bodylen, mime_type); + } + else { + meth_len = + rspamd_snprintf (repbuf, replen, + "HTTP/1.1 %d %T\r\n" + "Connection: close\r\n" + "Server: %s\r\n" + "Date: %s\r\n" + "Content-Length: %z", /* NO \r\n at the end ! */ + msg->code, &status, priv->ctx->config.server_hdr, + datebuf, + bodylen); + } + enclen += meth_len; + /* External reply */ + rspamd_printf_fstring (buf, + "HTTP/1.1 200 OK\r\n" + "Connection: close\r\n" + "Server: %s\r\n" + "Date: %s\r\n" + "Content-Length: %z\r\n" + "Content-Type: application/octet-stream\r\n", + priv->ctx->config.server_hdr, + datebuf, enclen); + } + else { + if (mime_type) { + meth_len = + rspamd_printf_fstring (buf, + "HTTP/1.1 %d %T\r\n" + "Connection: close\r\n" + "Server: %s\r\n" + "Date: %s\r\n" + "Content-Length: %z\r\n" + "Content-Type: %s\r\n", + msg->code, &status, priv->ctx->config.server_hdr, + datebuf, + bodylen, mime_type); + } + else { + meth_len = + rspamd_printf_fstring (buf, + "HTTP/1.1 %d %T\r\n" + "Connection: close\r\n" + "Server: %s\r\n" + "Date: %s\r\n" + "Content-Length: %z\r\n", + msg->code, &status, priv->ctx->config.server_hdr, + datebuf, + bodylen); + } + } + } + else { + /* Legacy spamd reply */ + if (msg->flags & RSPAMD_HTTP_FLAG_SPAMC) { + gsize real_bodylen; + goffset eoh_pos; + GString tmp; + + /* Unfortunately, spamc protocol is deadly brain damaged */ + tmp.str = (gchar *)msg->body_buf.begin; + tmp.len = msg->body_buf.len; + + if (rspamd_string_find_eoh (&tmp, &eoh_pos) != -1 && + bodylen > eoh_pos) { + real_bodylen = bodylen - eoh_pos; + } + else { + real_bodylen = bodylen; + } + + rspamd_printf_fstring (buf, "SPAMD/1.1 0 EX_OK\r\n" + "Content-length: %z\r\n", + real_bodylen); + } + else { + rspamd_printf_fstring (buf, "RSPAMD/1.3 0 EX_OK\r\n"); + } + } + } + else { + + /* Client request */ + if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) { + conn_type = "keep-alive"; + } + + /* Format request */ + enclen += RSPAMD_FSTRING_LEN (msg->url) + + strlen (http_method_str (msg->method)) + 1; + + if (host == NULL && msg->host == NULL) { + /* Fallback to HTTP/1.0 */ + if (encrypted) { + rspamd_printf_fstring (buf, + "%s %s HTTP/1.0\r\n" + "Content-Length: %z\r\n" + "Content-Type: application/octet-stream\r\n" + "Connection: %s\r\n", + "POST", + "/post", + enclen, + conn_type); + } + else { + rspamd_printf_fstring (buf, + "%s %V HTTP/1.0\r\n" + "Content-Length: %z\r\n" + "Connection: %s\r\n", + http_method_str (msg->method), + msg->url, + bodylen, + conn_type); + + if (bodylen > 0) { + if (mime_type == NULL) { + mime_type = "text/plain"; + } + + rspamd_printf_fstring (buf, + "Content-Type: %s\r\n", + mime_type); + } + } + } + else { + /* Normal HTTP/1.1 with Host */ + if (host == NULL) { + host = msg->host->str; + } + + if (encrypted) { + /* TODO: Add proxy support to HTTPCrypt */ + rspamd_printf_fstring (buf, + "%s %s HTTP/1.1\r\n" + "Connection: %s\r\n" + "Host: %s\r\n" + "Content-Length: %z\r\n" + "Content-Type: application/octet-stream\r\n", + "POST", + "/post", + conn_type, + host, + enclen); + } + else { + if (conn->priv->flags & RSPAMD_HTTP_CONN_FLAG_PROXY) { + rspamd_printf_fstring (buf, + "%s %s://%s:%d/%V HTTP/1.1\r\n" + "Connection: %s\r\n" + "Host: %s\r\n" + "Content-Length: %z\r\n", + http_method_str (msg->method), + (msg->flags & RSPAMD_HTTP_FLAG_SSL) ? "https" : "http", + host, + msg->port, + msg->url, + conn_type, + host, + bodylen); + } + else { + rspamd_printf_fstring (buf, + "%s %V HTTP/1.1\r\n" + "Connection: %s\r\n" + "Host: %s\r\n" + "Content-Length: %z\r\n", + http_method_str (msg->method), + msg->url, + conn_type, + host, + bodylen); + } + + if (bodylen > 0) { + if (mime_type != NULL) { + rspamd_printf_fstring (buf, + "Content-Type: %s\r\n", + mime_type); + } + } + } + } + + if (encrypted) { + GString *b32_key, *b32_id; + + b32_key = rspamd_keypair_print (priv->local_key, + RSPAMD_KEYPAIR_PUBKEY | RSPAMD_KEYPAIR_BASE32); + b32_id = rspamd_pubkey_print (peer_key, + RSPAMD_KEYPAIR_ID_SHORT | RSPAMD_KEYPAIR_BASE32); + /* XXX: add some fuzz here */ + rspamd_printf_fstring (&*buf, "Key: %v=%v\r\n", b32_id, b32_key); + g_string_free (b32_key, TRUE); + g_string_free (b32_id, TRUE); + } + } + + return meth_len; +} + +static gboolean +rspamd_http_connection_write_message_common (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *host, + const gchar *mime_type, + gpointer ud, + ev_tstamp timeout, + gboolean allow_shared) +{ + struct rspamd_http_connection_private *priv = conn->priv; + struct rspamd_http_header *hdr, *hcur; + gchar repbuf[512], *pbody; + gint i, hdrcount, meth_len = 0, preludelen = 0; + gsize bodylen, enclen = 0; + rspamd_fstring_t *buf; + gboolean encrypted = FALSE; + guchar nonce[rspamd_cryptobox_MAX_NONCEBYTES], mac[rspamd_cryptobox_MAX_MACBYTES]; + guchar *np = NULL, *mp = NULL, *meth_pos = NULL; + struct rspamd_cryptobox_pubkey *peer_key = NULL; + enum rspamd_cryptobox_mode mode; + GError *err; + + conn->ud = ud; + priv->msg = msg; + priv->timeout = timeout; + + priv->header = NULL; + priv->buf = g_malloc0 (sizeof (*priv->buf)); + REF_INIT_RETAIN (priv->buf, rspamd_http_privbuf_dtor); + priv->buf->data = rspamd_fstring_sized_new (512); + buf = priv->buf->data; + + if (priv->peer_key && priv->local_key) { + priv->msg->peer_key = priv->peer_key; + priv->peer_key = NULL; + priv->flags |= RSPAMD_HTTP_CONN_FLAG_ENCRYPTED; + } + + if (msg->peer_key != NULL) { + if (priv->local_key == NULL) { + /* Automatically generate a temporary keypair */ + priv->local_key = rspamd_keypair_new (RSPAMD_KEYPAIR_KEX, + RSPAMD_CRYPTOBOX_MODE_25519); + } + + encrypted = TRUE; + + if (priv->cache) { + rspamd_keypair_cache_process (priv->cache, + priv->local_key, priv->msg->peer_key); + } + } + + if (encrypted && (msg->flags & + (RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE|RSPAMD_HTTP_FLAG_SHMEM))) { + /* We cannot use immutable body to encrypt message in place */ + allow_shared = FALSE; + rspamd_http_detach_shared (msg); + } + + if (allow_shared) { + gchar tmpbuf[64]; + + if (!(msg->flags & RSPAMD_HTTP_FLAG_SHMEM) || + msg->body_buf.c.shared.name == NULL) { + allow_shared = FALSE; + } + else { + /* Insert new headers */ + rspamd_http_message_add_header (msg, "Shm", + msg->body_buf.c.shared.name->shm_name); + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "%d", + (int)(msg->body_buf.begin - msg->body_buf.str)); + rspamd_http_message_add_header (msg, "Shm-Offset", + tmpbuf); + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "%z", + msg->body_buf.len); + rspamd_http_message_add_header (msg, "Shm-Length", + tmpbuf); + } + } + + if (priv->ctx->config.user_agent && conn->type == RSPAMD_HTTP_CLIENT) { + rspamd_ftok_t srch; + khiter_t k; + gint r; + + RSPAMD_FTOK_ASSIGN (&srch, "User-Agent"); + + k = kh_put (rspamd_http_headers_hash, msg->headers, &srch,&r); + + if (r != 0) { + hdr = g_malloc0 (sizeof (struct rspamd_http_header)); + guint vlen = strlen (priv->ctx->config.user_agent); + hdr->combined = rspamd_fstring_sized_new (srch.len + vlen + 4); + rspamd_printf_fstring (&hdr->combined, "%T: %*s\r\n", &srch, vlen, + priv->ctx->config.user_agent); + hdr->name.begin = hdr->combined->str; + hdr->name.len = srch.len; + hdr->value.begin = hdr->combined->str + srch.len + 2; + hdr->value.len = vlen; + hdr->prev = hdr; /* for utlists */ + + kh_value (msg->headers, k) = hdr; + /* as we searched using static buffer */ + kh_key (msg->headers, k) = &hdr->name; + } + } + + if (encrypted) { + mode = rspamd_keypair_alg (priv->local_key); + + if (msg->body_buf.len == 0) { + pbody = NULL; + bodylen = 0; + msg->method = HTTP_GET; + } + else { + pbody = (gchar *)msg->body_buf.begin; + bodylen = msg->body_buf.len; + msg->method = HTTP_POST; + } + + if (conn->type == RSPAMD_HTTP_SERVER) { + /* + * iov[0] = base reply + * iov[1] = CRLF + * iov[2] = nonce + * iov[3] = mac + * iov[4] = encrypted reply + * iov[6] = encrypted crlf + * iov[7..n] = encrypted headers + * iov[n + 1] = encrypted crlf + * [iov[n + 2] = encrypted body] + */ + priv->outlen = 7; + enclen = rspamd_cryptobox_nonce_bytes (mode) + + rspamd_cryptobox_mac_bytes (mode) + + 4 + /* 2 * CRLF */ + bodylen; + } + else { + /* + * iov[0] = base request + * iov[1] = CRLF + * iov[2] = nonce + * iov[3] = mac + * iov[4] = encrypted method + space + * iov[5] = encrypted url + * iov[7] = encrypted prelude + * iov[8..n] = encrypted headers + * iov[n + 1] = encrypted crlf + * [iov[n + 2] = encrypted body] + */ + priv->outlen = 8; + + if (bodylen > 0) { + if (mime_type != NULL) { + preludelen = rspamd_snprintf (repbuf, sizeof (repbuf), "%s\r\n" + "Content-Length: %z\r\n" + "Content-Type: %s\r\n" + "\r\n", ENCRYPTED_VERSION, bodylen, + mime_type); + } + else { + preludelen = rspamd_snprintf (repbuf, sizeof (repbuf), "%s\r\n" + "Content-Length: %z\r\n" + "" + "\r\n", ENCRYPTED_VERSION, bodylen); + } + } + else { + preludelen = rspamd_snprintf (repbuf, sizeof (repbuf), + "%s\r\n\r\n", + ENCRYPTED_VERSION); + } + + enclen = rspamd_cryptobox_nonce_bytes (mode) + + rspamd_cryptobox_mac_bytes (mode) + + preludelen + /* version [content-length] + 2 * CRLF */ + bodylen; + } + + if (bodylen > 0) { + priv->outlen ++; + } + } + else { + if (msg->method < HTTP_SYMBOLS) { + if (msg->body_buf.len == 0 || allow_shared) { + pbody = NULL; + bodylen = 0; + priv->outlen = 2; + + if (msg->method == HTTP_INVALID) { + msg->method = HTTP_GET; + } + } + else { + pbody = (gchar *)msg->body_buf.begin; + bodylen = msg->body_buf.len; + priv->outlen = 3; + + if (msg->method == HTTP_INVALID) { + msg->method = HTTP_POST; + } + } + } + else if (msg->body_buf.len > 0) { + allow_shared = FALSE; + pbody = (gchar *)msg->body_buf.begin; + bodylen = msg->body_buf.len; + priv->outlen = 2; + } + else { + /* Invalid body for spamc method */ + abort (); + } + } + + peer_key = msg->peer_key; + + priv->wr_total = bodylen + 2; + + hdrcount = 0; + + if (msg->method < HTTP_SYMBOLS) { + kh_foreach_value (msg->headers, hdr, { + DL_FOREACH (hdr, hcur) { + /* <name: value\r\n> */ + priv->wr_total += hcur->combined->len; + enclen += hcur->combined->len; + priv->outlen ++; + hdrcount ++; + } + }); + } + + /* Allocate iov */ + priv->out = g_malloc0 (sizeof (struct iovec) * priv->outlen); + priv->wr_pos = 0; + + meth_len = rspamd_http_message_write_header (mime_type, encrypted, + repbuf, sizeof (repbuf), bodylen, enclen, + host, conn, msg, + &buf, priv, peer_key); + priv->wr_total += buf->len; + + /* Setup external request body */ + priv->out[0].iov_base = buf->str; + priv->out[0].iov_len = buf->len; + + /* Buf will be used eventually for encryption */ + if (encrypted) { + gint meth_offset, nonce_offset, mac_offset; + mode = rspamd_keypair_alg (priv->local_key); + + ottery_rand_bytes (nonce, rspamd_cryptobox_nonce_bytes (mode)); + memset (mac, 0, rspamd_cryptobox_mac_bytes (mode)); + meth_offset = buf->len; + + if (conn->type == RSPAMD_HTTP_SERVER) { + buf = rspamd_fstring_append (buf, repbuf, meth_len); + } + else { + meth_len = strlen (http_method_str (msg->method)) + 1; /* + space */ + buf = rspamd_fstring_append (buf, http_method_str (msg->method), + meth_len - 1); + buf = rspamd_fstring_append (buf, " ", 1); + } + + nonce_offset = buf->len; + buf = rspamd_fstring_append (buf, nonce, + rspamd_cryptobox_nonce_bytes (mode)); + mac_offset = buf->len; + buf = rspamd_fstring_append (buf, mac, + rspamd_cryptobox_mac_bytes (mode)); + + /* Need to be encrypted */ + if (conn->type == RSPAMD_HTTP_SERVER) { + buf = rspamd_fstring_append (buf, "\r\n\r\n", 4); + } + else { + buf = rspamd_fstring_append (buf, repbuf, preludelen); + } + + meth_pos = buf->str + meth_offset; + np = buf->str + nonce_offset; + mp = buf->str + mac_offset; + } + + /* During previous writes, buf might be reallocated and changed */ + priv->buf->data = buf; + + if (encrypted) { + /* Finish external HTTP request */ + priv->out[1].iov_base = "\r\n"; + priv->out[1].iov_len = 2; + /* Encrypt the real request */ + rspamd_http_connection_encrypt_message (conn, msg, priv, pbody, bodylen, + meth_pos, meth_len, preludelen, hdrcount, np, mp, peer_key); + } + else { + i = 1; + if (msg->method < HTTP_SYMBOLS) { + kh_foreach_value (msg->headers, hdr, { + DL_FOREACH (hdr, hcur) { + priv->out[i].iov_base = hcur->combined->str; + priv->out[i++].iov_len = hcur->combined->len; + } + }); + + priv->out[i].iov_base = "\r\n"; + priv->out[i++].iov_len = 2; + } + else { + /* No CRLF for compatibility reply */ + priv->wr_total -= 2; + } + + if (pbody != NULL) { + priv->out[i].iov_base = pbody; + priv->out[i++].iov_len = bodylen; + } + } + + priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_RESETED; + + if (priv->flags & RSPAMD_HTTP_CONN_FLAG_PROXY) { + /* We need to disable SSL flag! */ + msg->flags &=~ RSPAMD_HTTP_FLAG_SSL; + } + + rspamd_ev_watcher_stop (priv->ctx->event_loop, &priv->ev); + + if (msg->flags & RSPAMD_HTTP_FLAG_SSL) { + gpointer ssl_ctx = (msg->flags & RSPAMD_HTTP_FLAG_SSL_NOVERIFY) ? + priv->ctx->ssl_ctx_noverify : priv->ctx->ssl_ctx; + + if (!ssl_ctx) { + err = g_error_new (HTTP_ERROR, errno, "ssl message requested " + "with no ssl ctx"); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + return FALSE; + } + else { + if (priv->ssl) { + /* Cleanup the existing connection */ + rspamd_ssl_connection_free (priv->ssl); + } + + priv->ssl = rspamd_ssl_connection_new (ssl_ctx, priv->ctx->event_loop, + !(msg->flags & RSPAMD_HTTP_FLAG_SSL_NOVERIFY), + conn->log_tag); + g_assert (priv->ssl != NULL); + + if (!rspamd_ssl_connect_fd (priv->ssl, conn->fd, host, &priv->ev, + priv->timeout, rspamd_http_event_handler, + rspamd_http_ssl_err_handler, conn)) { + + err = g_error_new (HTTP_ERROR, errno, + "ssl connection error: ssl error=%s, errno=%s", + ERR_error_string (ERR_get_error (), NULL), + strerror (errno)); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + return FALSE; + } + } + } + else { + rspamd_ev_watcher_init (&priv->ev, conn->fd, EV_WRITE, + rspamd_http_event_handler, conn); + rspamd_ev_watcher_start (priv->ctx->event_loop, &priv->ev, priv->timeout); + } + + return TRUE; +} + +gboolean +rspamd_http_connection_write_message (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *host, + const gchar *mime_type, + gpointer ud, + ev_tstamp timeout) +{ + return rspamd_http_connection_write_message_common (conn, msg, host, mime_type, + ud, timeout, FALSE); +} + +gboolean +rspamd_http_connection_write_message_shared (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *host, + const gchar *mime_type, + gpointer ud, + ev_tstamp timeout) +{ + return rspamd_http_connection_write_message_common (conn, msg, host, mime_type, + ud, timeout, TRUE); +} + + +void +rspamd_http_connection_set_max_size (struct rspamd_http_connection *conn, + gsize sz) +{ + conn->max_size = sz; +} + +void +rspamd_http_connection_set_key (struct rspamd_http_connection *conn, + struct rspamd_cryptobox_keypair *key) +{ + struct rspamd_http_connection_private *priv = conn->priv; + + g_assert (key != NULL); + priv->local_key = rspamd_keypair_ref (key); +} + +const struct rspamd_cryptobox_pubkey* +rspamd_http_connection_get_peer_key (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv = conn->priv; + + if (priv->peer_key) { + return priv->peer_key; + } + else if (priv->msg) { + return priv->msg->peer_key; + } + + return NULL; +} + +gboolean +rspamd_http_connection_is_encrypted (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv = conn->priv; + + if (priv->peer_key != NULL) { + return TRUE; + } + else if (priv->msg) { + return priv->msg->peer_key != NULL; + } + + return FALSE; +} + +GHashTable * +rspamd_http_message_parse_query (struct rspamd_http_message *msg) +{ + GHashTable *res; + rspamd_fstring_t *key = NULL, *value = NULL; + rspamd_ftok_t *key_tok = NULL, *value_tok = NULL; + const gchar *p, *c, *end; + struct http_parser_url u; + enum { + parse_key, + parse_eqsign, + parse_value, + parse_ampersand + } state = parse_key; + + res = g_hash_table_new_full (rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal, + rspamd_fstring_mapped_ftok_free, + rspamd_fstring_mapped_ftok_free); + + if (msg->url && msg->url->len > 0) { + http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u); + + if (u.field_set & (1 << UF_QUERY)) { + p = msg->url->str + u.field_data[UF_QUERY].off; + c = p; + end = p + u.field_data[UF_QUERY].len; + + while (p <= end) { + switch (state) { + case parse_key: + if ((p == end || *p == '&') && p > c) { + /* We have a single parameter without a value */ + key = rspamd_fstring_new_init (c, p - c); + key_tok = rspamd_ftok_map (key); + key_tok->len = rspamd_url_decode (key->str, key->str, + key->len); + + value = rspamd_fstring_new_init ("", 0); + value_tok = rspamd_ftok_map (value); + + g_hash_table_replace (res, key_tok, value_tok); + state = parse_ampersand; + } + else if (*p == '=' && p > c) { + /* We have something like key=value */ + key = rspamd_fstring_new_init (c, p - c); + key_tok = rspamd_ftok_map (key); + key_tok->len = rspamd_url_decode (key->str, key->str, + key->len); + + state = parse_eqsign; + } + else { + p ++; + } + break; + + case parse_eqsign: + if (*p != '=') { + c = p; + state = parse_value; + } + else { + p ++; + } + break; + + case parse_value: + if ((p == end || *p == '&') && p >= c) { + g_assert (key != NULL); + if (p > c) { + value = rspamd_fstring_new_init (c, p - c); + value_tok = rspamd_ftok_map (value); + value_tok->len = rspamd_url_decode (value->str, + value->str, + value->len); + /* Detect quotes for value */ + if (value_tok->begin[0] == '"') { + memmove (value->str, value->str + 1, + value_tok->len - 1); + value_tok->len --; + } + if (value_tok->begin[value_tok->len - 1] == '"') { + value_tok->len --; + } + } + else { + value = rspamd_fstring_new_init ("", 0); + value_tok = rspamd_ftok_map (value); + } + + g_hash_table_replace (res, key_tok, value_tok); + key = value = NULL; + key_tok = value_tok = NULL; + state = parse_ampersand; + } + else { + p ++; + } + break; + + case parse_ampersand: + if (p != end && *p != '&') { + c = p; + state = parse_key; + } + else { + p ++; + } + break; + } + } + } + + if (state != parse_ampersand && key != NULL) { + rspamd_fstring_free (key); + } + } + + return res; +} + + +struct rspamd_http_message * +rspamd_http_message_ref (struct rspamd_http_message *msg) +{ + REF_RETAIN (msg); + + return msg; +} + +void +rspamd_http_message_unref (struct rspamd_http_message *msg) +{ + REF_RELEASE (msg); +} + +void +rspamd_http_connection_disable_encryption (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv) { + if (priv->local_key) { + rspamd_keypair_unref (priv->local_key); + } + if (priv->peer_key) { + rspamd_pubkey_unref (priv->peer_key); + } + + priv->local_key = NULL; + priv->peer_key = NULL; + priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_ENCRYPTED; + } +}
\ No newline at end of file diff --git a/src/libserver/http/http_connection.h b/src/libserver/http/http_connection.h new file mode 100644 index 000000000..7c901fd2a --- /dev/null +++ b/src/libserver/http/http_connection.h @@ -0,0 +1,306 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef HTTP_H_ +#define HTTP_H_ + +/** + * @file http.h + * + * This is an interface for HTTP client and conn. + * This code uses HTTP parser written by Joyent Inc based on nginx code. + */ + +#include "config.h" +#include "http_context.h" +#include "fstring.h" +#include "ref.h" +#include "http_message.h" +#include "http_util.h" +#include "addr.h" + +#include "contrib/libev/ev.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum rspamd_http_connection_type { + RSPAMD_HTTP_SERVER, + RSPAMD_HTTP_CLIENT +}; + +struct rspamd_http_header; +struct rspamd_http_message; +struct rspamd_http_connection_private; +struct rspamd_http_connection; +struct rspamd_http_connection_router; +struct rspamd_http_connection_entry; +struct rspamd_keepalive_hash_key; + +struct rspamd_storage_shmem { + gchar *shm_name; + ref_entry_t ref; +}; + +/** + * Legacy spamc protocol + */ +#define RSPAMD_HTTP_FLAG_SPAMC (1 << 0) +/** + * Store body of the message in a shared memory segment + */ +#define RSPAMD_HTTP_FLAG_SHMEM (1 << 2) +/** + * Store body of the message in an immutable shared memory segment + */ +#define RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE (1 << 3) +/** + * Use tls for this message + */ +#define RSPAMD_HTTP_FLAG_SSL (1 << 4) +/** + * Body has been set for a message + */ +#define RSPAMD_HTTP_FLAG_HAS_BODY (1 << 5) +/** + * Do not verify server's certificate + */ +#define RSPAMD_HTTP_FLAG_SSL_NOVERIFY (1 << 6) +/** + * Options for HTTP connection + */ +enum rspamd_http_options { + RSPAMD_HTTP_BODY_PARTIAL = 1, /**< Call body handler on all body data portions */ + RSPAMD_HTTP_CLIENT_SIMPLE = 1u << 1, /**< Read HTTP client reply automatically */ + RSPAMD_HTTP_CLIENT_ENCRYPTED = 1u << 2, /**< Encrypt data for client */ + RSPAMD_HTTP_CLIENT_SHARED = 1u << 3, /**< Store reply in shared memory */ + RSPAMD_HTTP_REQUIRE_ENCRYPTION = 1u << 4, + RSPAMD_HTTP_CLIENT_KEEP_ALIVE = 1u << 5, +}; + +typedef int (*rspamd_http_body_handler_t) (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *chunk, + gsize len); + +typedef void (*rspamd_http_error_handler_t) (struct rspamd_http_connection *conn, + GError *err); + +typedef int (*rspamd_http_finish_handler_t) (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg); + +/** + * HTTP connection structure + */ +struct rspamd_http_connection { + struct rspamd_http_connection_private *priv; + rspamd_http_body_handler_t body_handler; + rspamd_http_error_handler_t error_handler; + rspamd_http_finish_handler_t finish_handler; + gpointer ud; + const gchar *log_tag; + /* Used for keepalive */ + struct rspamd_keepalive_hash_key *keepalive_hash_key; + gsize max_size; + unsigned opts; + enum rspamd_http_connection_type type; + gboolean finished; + gint fd; + gint ref; +}; + +/** + * Creates a new HTTP server connection from an opened FD returned by accept function + * @param ctx + * @param fd + * @param body_handler + * @param error_handler + * @param finish_handler + * @param opts + * @return + */ +struct rspamd_http_connection *rspamd_http_connection_new_server ( + struct rspamd_http_context *ctx, + gint fd, + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + unsigned opts); + +/** + * Creates or reuses a new keepalive client connection identified by hostname and inet_addr + * @param ctx + * @param body_handler + * @param error_handler + * @param finish_handler + * @param addr + * @param host + * @return + */ +struct rspamd_http_connection *rspamd_http_connection_new_keepalive ( + struct rspamd_http_context *ctx, + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + rspamd_inet_addr_t *addr, + const gchar *host); + +/** + * Creates an ordinary connection using the address specified (if proxy is not set) + * @param ctx + * @param body_handler + * @param error_handler + * @param finish_handler + * @param opts + * @param addr + * @return + */ +struct rspamd_http_connection *rspamd_http_connection_new_client ( + struct rspamd_http_context *ctx, + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + unsigned opts, + rspamd_inet_addr_t *addr); + +/** + * Creates an ordinary client connection using ready file descriptor (ignores proxy) + * @param ctx + * @param body_handler + * @param error_handler + * @param finish_handler + * @param opts + * @param addr + * @return + */ +struct rspamd_http_connection *rspamd_http_connection_new_client_socket ( + struct rspamd_http_context *ctx, + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + unsigned opts, + gint fd); + +/** + * Set key pointed by an opaque pointer + * @param conn connection structure + * @param key opaque key structure + */ +void rspamd_http_connection_set_key (struct rspamd_http_connection *conn, + struct rspamd_cryptobox_keypair *key); + +/** + * Get peer's public key + * @param conn connection structure + * @return pubkey structure or NULL + */ +const struct rspamd_cryptobox_pubkey *rspamd_http_connection_get_peer_key ( + struct rspamd_http_connection *conn); + +/** + * Returns TRUE if a connection is encrypted + * @param conn + * @return + */ +gboolean rspamd_http_connection_is_encrypted (struct rspamd_http_connection *conn); + +/** + * Handle a request using socket fd and user data ud + * @param conn connection structure + * @param ud opaque user data + * @param fd fd to read/write + */ +void rspamd_http_connection_read_message ( + struct rspamd_http_connection *conn, + gpointer ud, + ev_tstamp timeout); + +void rspamd_http_connection_read_message_shared ( + struct rspamd_http_connection *conn, + gpointer ud, + ev_tstamp timeout); + +/** + * Send reply using initialised connection + * @param conn connection structure + * @param msg HTTP message + * @param ud opaque user data + * @param fd fd to read/write + */ +gboolean rspamd_http_connection_write_message ( + struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *host, + const gchar *mime_type, + gpointer ud, + ev_tstamp timeout); + +gboolean rspamd_http_connection_write_message_shared ( + struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *host, + const gchar *mime_type, + gpointer ud, + ev_tstamp timeout); + +/** + * Free connection structure + * @param conn + */ +void rspamd_http_connection_free (struct rspamd_http_connection *conn); + +/** + * Increase refcount for a connection + * @param conn + * @return + */ +static inline struct rspamd_http_connection * +rspamd_http_connection_ref (struct rspamd_http_connection *conn) { + conn->ref++; + return conn; +} + +/** + * Decrease a refcount for a connection and free it if refcount is equal to zero + * @param conn + */ +static void +rspamd_http_connection_unref (struct rspamd_http_connection *conn) { + if (--conn->ref <= 0) { + rspamd_http_connection_free (conn); + } +} + +/** + * Reset connection for a new request + * @param conn + */ +void rspamd_http_connection_reset (struct rspamd_http_connection *conn); + +/** + * Sets global maximum size for HTTP message being processed + * @param sz + */ +void rspamd_http_connection_set_max_size (struct rspamd_http_connection *conn, + gsize sz); + +void rspamd_http_connection_disable_encryption (struct rspamd_http_connection *conn); + +#ifdef __cplusplus +} +#endif + +#endif /* HTTP_H_ */ diff --git a/src/libserver/http/http_context.c b/src/libserver/http/http_context.c new file mode 100644 index 000000000..d7e530d56 --- /dev/null +++ b/src/libserver/http/http_context.c @@ -0,0 +1,585 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <contrib/http-parser/http_parser.h> +#include "http_context.h" +#include "http_private.h" +#include "keypair.h" +#include "keypairs_cache.h" +#include "cfg_file.h" +#include "contrib/libottery/ottery.h" +#include "contrib/http-parser/http_parser.h" +#include "rspamd.h" +#include "libev_helper.h" + +INIT_LOG_MODULE(http_context) + +#define msg_debug_http_context(...) rspamd_conditional_debug_fast (NULL, NULL, \ + rspamd_http_context_log_id, "http_context", NULL, \ + G_STRFUNC, \ + __VA_ARGS__) + +static struct rspamd_http_context *default_ctx = NULL; + +struct rspamd_http_keepalive_cbdata { + struct rspamd_http_connection *conn; + struct rspamd_http_context *ctx; + GQueue *queue; + GList *link; + struct rspamd_io_ev ev; +}; + +static void +rspamd_http_keepalive_queue_cleanup (GQueue *conns) +{ + GList *cur; + + cur = conns->head; + + while (cur) { + struct rspamd_http_keepalive_cbdata *cbd; + + cbd = (struct rspamd_http_keepalive_cbdata *)cur->data; + rspamd_http_connection_unref (cbd->conn); + rspamd_ev_watcher_stop (cbd->ctx->event_loop, &cbd->ev); + g_free (cbd); + + cur = cur->next; + } + + g_queue_clear (conns); +} + +static void +rspamd_http_context_client_rotate_ev (struct ev_loop *loop, ev_timer *w, int revents) +{ + struct rspamd_http_context *ctx = (struct rspamd_http_context *)w->data; + gpointer kp; + + w->repeat = rspamd_time_jitter (ctx->config.client_key_rotate_time, 0); + msg_debug_http_context ("rotate local keypair, next rotate in %.0f seconds", + w->repeat); + + ev_timer_again (loop, w); + + kp = ctx->client_kp; + ctx->client_kp = rspamd_keypair_new (RSPAMD_KEYPAIR_KEX, + RSPAMD_CRYPTOBOX_MODE_25519); + rspamd_keypair_unref (kp); +} + +static struct rspamd_http_context* +rspamd_http_context_new_default (struct rspamd_config *cfg, + struct ev_loop *ev_base, + struct upstream_ctx *ups_ctx) +{ + struct rspamd_http_context *ctx; + + static const int default_kp_size = 1024; + static const gdouble default_rotate_time = 120; + static const gdouble default_keepalive_interval = 65; + static const gchar *default_user_agent = "rspamd-" RSPAMD_VERSION_FULL; + static const gchar *default_server_hdr = "rspamd/" RSPAMD_VERSION_FULL; + + ctx = g_malloc0 (sizeof (*ctx)); + ctx->config.kp_cache_size_client = default_kp_size; + ctx->config.kp_cache_size_server = default_kp_size; + ctx->config.client_key_rotate_time = default_rotate_time; + ctx->config.user_agent = default_user_agent; + ctx->config.keepalive_interval = default_keepalive_interval; + ctx->config.server_hdr = default_server_hdr; + ctx->ups_ctx = ups_ctx; + + if (cfg) { + ctx->ssl_ctx = cfg->libs_ctx->ssl_ctx; + ctx->ssl_ctx_noverify = cfg->libs_ctx->ssl_ctx_noverify; + } + else { + ctx->ssl_ctx = rspamd_init_ssl_ctx (); + ctx->ssl_ctx_noverify = rspamd_init_ssl_ctx_noverify (); + } + + ctx->event_loop = ev_base; + + ctx->keep_alive_hash = kh_init (rspamd_keep_alive_hash); + + return ctx; +} + +static void +rspamd_http_context_parse_proxy (struct rspamd_http_context *ctx, + const gchar *name, + struct upstream_list **pls) +{ + struct http_parser_url u; + struct upstream_list *uls; + + if (!ctx->ups_ctx) { + msg_err ("cannot parse http_proxy %s - upstreams context is udefined", name); + return; + } + + memset (&u, 0, sizeof (u)); + + if (http_parser_parse_url (name, strlen (name), 1, &u) == 0) { + if (!(u.field_set & (1u << UF_HOST)) || u.port == 0) { + msg_err ("cannot parse http(s) proxy %s - invalid host or port", name); + + return; + } + + uls = rspamd_upstreams_create (ctx->ups_ctx); + + if (!rspamd_upstreams_parse_line_len (uls, + name + u.field_data[UF_HOST].off, + u.field_data[UF_HOST].len, u.port, NULL)) { + msg_err ("cannot parse http(s) proxy %s - invalid data", name); + + rspamd_upstreams_destroy (uls); + } + else { + *pls = uls; + msg_info ("set http(s) proxy to %s", name); + } + } + else { + uls = rspamd_upstreams_create (ctx->ups_ctx); + + if (!rspamd_upstreams_parse_line (uls, + name, 3128, NULL)) { + msg_err ("cannot parse http(s) proxy %s - invalid data", name); + + rspamd_upstreams_destroy (uls); + } + else { + *pls = uls; + msg_info ("set http(s) proxy to %s", name); + } + } +} + +static void +rspamd_http_context_init (struct rspamd_http_context *ctx) +{ + if (ctx->config.kp_cache_size_client > 0) { + ctx->client_kp_cache = rspamd_keypair_cache_new (ctx->config.kp_cache_size_client); + } + + if (ctx->config.kp_cache_size_server > 0) { + ctx->server_kp_cache = rspamd_keypair_cache_new (ctx->config.kp_cache_size_server); + } + + if (ctx->config.client_key_rotate_time > 0 && ctx->event_loop) { + double jittered = rspamd_time_jitter (ctx->config.client_key_rotate_time, + 0); + + ev_timer_init (&ctx->client_rotate_ev, + rspamd_http_context_client_rotate_ev, jittered, 0); + ev_timer_start (ctx->event_loop, &ctx->client_rotate_ev); + ctx->client_rotate_ev.data = ctx; + } + + if (ctx->config.http_proxy) { + rspamd_http_context_parse_proxy (ctx, ctx->config.http_proxy, + &ctx->http_proxies); + } + + default_ctx = ctx; +} + +struct rspamd_http_context* +rspamd_http_context_create (struct rspamd_config *cfg, + struct ev_loop *ev_base, + struct upstream_ctx *ups_ctx) +{ + struct rspamd_http_context *ctx; + const ucl_object_t *http_obj; + + ctx = rspamd_http_context_new_default (cfg, ev_base, ups_ctx); + http_obj = ucl_object_lookup (cfg->rcl_obj, "http"); + + if (http_obj) { + const ucl_object_t *server_obj, *client_obj; + + client_obj = ucl_object_lookup (http_obj, "client"); + + if (client_obj) { + const ucl_object_t *kp_size; + + kp_size = ucl_object_lookup (client_obj, "cache_size"); + + if (kp_size) { + ctx->config.kp_cache_size_client = ucl_object_toint (kp_size); + } + + const ucl_object_t *rotate_time; + + rotate_time = ucl_object_lookup (client_obj, "rotate_time"); + + if (rotate_time) { + ctx->config.client_key_rotate_time = ucl_object_todouble (rotate_time); + } + + const ucl_object_t *user_agent; + + user_agent = ucl_object_lookup (client_obj, "user_agent"); + + if (user_agent) { + ctx->config.user_agent = ucl_object_tostring (user_agent); + + if (ctx->config.user_agent && strlen (ctx->config.user_agent) == 0) { + ctx->config.user_agent = NULL; + } + } + + const ucl_object_t *server_hdr; + server_hdr = ucl_object_lookup (client_obj, "server_hdr"); + + if (server_hdr) { + ctx->config.server_hdr = ucl_object_tostring (server_hdr); + + if (ctx->config.server_hdr && strlen (ctx->config.server_hdr) == 0) { + ctx->config.server_hdr = ""; + } + } + + const ucl_object_t *keepalive_interval; + + keepalive_interval = ucl_object_lookup (client_obj, "keepalive_interval"); + + if (keepalive_interval) { + ctx->config.keepalive_interval = ucl_object_todouble (keepalive_interval); + } + + const ucl_object_t *http_proxy; + http_proxy = ucl_object_lookup (client_obj, "http_proxy"); + + if (http_proxy) { + ctx->config.http_proxy = ucl_object_tostring (http_proxy); + } + } + + server_obj = ucl_object_lookup (http_obj, "server"); + + if (server_obj) { + const ucl_object_t *kp_size; + + kp_size = ucl_object_lookup (server_obj, "cache_size"); + + if (kp_size) { + ctx->config.kp_cache_size_server = ucl_object_toint (kp_size); + } + } + } + + rspamd_http_context_init (ctx); + + return ctx; +} + + +void +rspamd_http_context_free (struct rspamd_http_context *ctx) +{ + if (ctx == default_ctx) { + default_ctx = NULL; + } + + if (ctx->client_kp_cache) { + rspamd_keypair_cache_destroy (ctx->client_kp_cache); + } + + if (ctx->server_kp_cache) { + rspamd_keypair_cache_destroy (ctx->server_kp_cache); + } + + if (ctx->config.client_key_rotate_time > 0) { + ev_timer_stop (ctx->event_loop, &ctx->client_rotate_ev); + + if (ctx->client_kp) { + rspamd_keypair_unref (ctx->client_kp); + } + } + + struct rspamd_keepalive_hash_key *hk; + + kh_foreach_key (ctx->keep_alive_hash, hk, { + msg_debug_http_context ("cleanup keepalive elt %s (%s)", + rspamd_inet_address_to_string_pretty (hk->addr), + hk->host); + + if (hk->host) { + g_free (hk->host); + } + + rspamd_inet_address_free (hk->addr); + rspamd_http_keepalive_queue_cleanup (&hk->conns); + g_free (hk); + }); + + kh_destroy (rspamd_keep_alive_hash, ctx->keep_alive_hash); + + if (ctx->http_proxies) { + rspamd_upstreams_destroy (ctx->http_proxies); + } + + g_free (ctx); +} + +struct rspamd_http_context* +rspamd_http_context_create_config (struct rspamd_http_context_cfg *cfg, + struct ev_loop *ev_base, + struct upstream_ctx *ups_ctx) +{ + struct rspamd_http_context *ctx; + + ctx = rspamd_http_context_new_default (NULL, ev_base, ups_ctx); + memcpy (&ctx->config, cfg, sizeof (*cfg)); + rspamd_http_context_init (ctx); + + return ctx; +} + +struct rspamd_http_context* +rspamd_http_context_default (void) +{ + g_assert (default_ctx != NULL); + + return default_ctx; +} + +gint32 +rspamd_keep_alive_key_hash (struct rspamd_keepalive_hash_key *k) +{ + gint32 h; + + h = rspamd_inet_address_port_hash (k->addr); + + if (k->host) { + h = rspamd_cryptobox_fast_hash (k->host, strlen (k->host), h); + } + + return h; +} + +bool +rspamd_keep_alive_key_equal (struct rspamd_keepalive_hash_key *k1, + struct rspamd_keepalive_hash_key *k2) +{ + if (k1->host && k2->host) { + if (rspamd_inet_address_port_equal (k1->addr, k2->addr)) { + return strcmp (k1->host, k2->host) == 0; + } + } + else if (!k1->host && !k2->host) { + return rspamd_inet_address_port_equal (k1->addr, k2->addr); + } + + /* One has host and another has no host */ + return false; +} + +struct rspamd_http_connection* +rspamd_http_context_check_keepalive (struct rspamd_http_context *ctx, + const rspamd_inet_addr_t *addr, + const gchar *host) +{ + struct rspamd_keepalive_hash_key hk, *phk; + khiter_t k; + + hk.addr = (rspamd_inet_addr_t *)addr; + hk.host = (gchar *)host; + + k = kh_get (rspamd_keep_alive_hash, ctx->keep_alive_hash, &hk); + + if (k != kh_end (ctx->keep_alive_hash)) { + phk = kh_key (ctx->keep_alive_hash, k); + GQueue *conns = &phk->conns; + + /* Use stack based approach */ + + if (g_queue_get_length (conns) > 0) { + struct rspamd_http_keepalive_cbdata *cbd; + struct rspamd_http_connection *conn; + + cbd = g_queue_pop_head (conns); + rspamd_ev_watcher_stop (ctx->event_loop, &cbd->ev); + conn = cbd->conn; + g_free (cbd); + + msg_debug_http_context ("reused keepalive element %s (%s), %d connections queued", + rspamd_inet_address_to_string_pretty (phk->addr), + phk->host, conns->length); + + /* We transfer refcount here! */ + return conn; + } + else { + msg_debug_http_context ("found empty keepalive element %s (%s), cannot reuse", + rspamd_inet_address_to_string_pretty (phk->addr), + phk->host); + } + } + + return NULL; +} + +void +rspamd_http_context_prepare_keepalive (struct rspamd_http_context *ctx, + struct rspamd_http_connection *conn, + const rspamd_inet_addr_t *addr, + const gchar *host) +{ + struct rspamd_keepalive_hash_key hk, *phk; + khiter_t k; + + hk.addr = (rspamd_inet_addr_t *)addr; + hk.host = (gchar *)host; + + k = kh_get (rspamd_keep_alive_hash, ctx->keep_alive_hash, &hk); + + if (k != kh_end (ctx->keep_alive_hash)) { + /* Reuse existing */ + conn->keepalive_hash_key = kh_key (ctx->keep_alive_hash, k); + msg_debug_http_context ("use existing keepalive element %s (%s)", + rspamd_inet_address_to_string_pretty (conn->keepalive_hash_key->addr), + conn->keepalive_hash_key->host); + } + else { + /* Create new one */ + GQueue empty_init = G_QUEUE_INIT; + gint r; + + phk = g_malloc (sizeof (*phk)); + phk->conns = empty_init; + phk->host = g_strdup (host); + phk->addr = rspamd_inet_address_copy (addr); + + kh_put (rspamd_keep_alive_hash, ctx->keep_alive_hash, phk, &r); + conn->keepalive_hash_key = phk; + + msg_debug_http_context ("create new keepalive element %s (%s)", + rspamd_inet_address_to_string_pretty (conn->keepalive_hash_key->addr), + conn->keepalive_hash_key->host); + } +} + +static void +rspamd_http_keepalive_handler (gint fd, short what, gpointer ud) +{ + struct rspamd_http_keepalive_cbdata *cbdata = + (struct rspamd_http_keepalive_cbdata *)ud;/* + * We can get here if a remote side reported something or it has + * timed out. In both cases we just terminate keepalive connection. + */ + + g_queue_delete_link (cbdata->queue, cbdata->link); + msg_debug_http_context ("remove keepalive element %s (%s), %d connections left", + rspamd_inet_address_to_string_pretty (cbdata->conn->keepalive_hash_key->addr), + cbdata->conn->keepalive_hash_key->host, + cbdata->queue->length); + rspamd_http_connection_unref (cbdata->conn); + rspamd_ev_watcher_stop (cbdata->ctx->event_loop, &cbdata->ev); + g_free (cbdata); +} + +void +rspamd_http_context_push_keepalive (struct rspamd_http_context *ctx, + struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + struct ev_loop *event_loop) +{ + struct rspamd_http_keepalive_cbdata *cbdata; + gdouble timeout = ctx->config.keepalive_interval; + + g_assert (conn->keepalive_hash_key != NULL); + + if (msg) { + const rspamd_ftok_t *tok; + rspamd_ftok_t cmp; + + tok = rspamd_http_message_find_header (msg, "Connection"); + + if (!tok) { + /* Server has not stated that it can do keep alive */ + conn->finished = TRUE; + msg_debug_http_context ("no Connection header"); + return; + } + + RSPAMD_FTOK_ASSIGN (&cmp, "keep-alive"); + + if (rspamd_ftok_casecmp (&cmp, tok) != 0) { + conn->finished = TRUE; + msg_debug_http_context ("connection header is not `keep-alive`"); + return; + } + + /* We can proceed, check timeout */ + + tok = rspamd_http_message_find_header (msg, "Keep-Alive"); + + if (tok) { + goffset pos = rspamd_substring_search_caseless (tok->begin, + tok->len, "timeout=", sizeof ("timeout=") - 1); + + if (pos != -1) { + pos += sizeof ("timeout="); + + gchar *end_pos = memchr (tok->begin + pos, ',', tok->len - pos); + glong real_timeout; + + if (end_pos) { + if (rspamd_strtol (tok->begin + pos + 1, + (end_pos - tok->begin) - pos - 1, &real_timeout) && + real_timeout > 0) { + timeout = real_timeout; + msg_debug_http_context ("got timeout attr %.2f", timeout); + } + } + else { + if (rspamd_strtol (tok->begin + pos + 1, + tok->len - pos - 1, &real_timeout) && + real_timeout > 0) { + timeout = real_timeout; + msg_debug_http_context ("got timeout attr %.2f", timeout); + } + } + } + } + } + + /* Move connection to the keepalive pool */ + cbdata = g_malloc0 (sizeof (*cbdata)); + + cbdata->conn = rspamd_http_connection_ref (conn); + g_queue_push_tail (&conn->keepalive_hash_key->conns, cbdata); + cbdata->link = conn->keepalive_hash_key->conns.tail; + cbdata->queue = &conn->keepalive_hash_key->conns; + cbdata->ctx = ctx; + conn->finished = FALSE; + + rspamd_ev_watcher_init (&cbdata->ev, conn->fd, EV_READ, + rspamd_http_keepalive_handler, + cbdata); + rspamd_ev_watcher_start (event_loop, &cbdata->ev, timeout); + + msg_debug_http_context ("push keepalive element %s (%s), %d connections queued, %.1f timeout", + rspamd_inet_address_to_string_pretty (cbdata->conn->keepalive_hash_key->addr), + cbdata->conn->keepalive_hash_key->host, + cbdata->queue->length, + timeout); +}
\ No newline at end of file diff --git a/src/libserver/http/http_context.h b/src/libserver/http/http_context.h new file mode 100644 index 000000000..82ee400b0 --- /dev/null +++ b/src/libserver/http/http_context.h @@ -0,0 +1,110 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_HTTP_CONTEXT_H +#define RSPAMD_HTTP_CONTEXT_H + +#include "config.h" +#include "ucl.h" +#include "addr.h" + +#include "contrib/libev/ev.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_http_context; +struct rspamd_config; +struct rspamd_http_message; +struct upstream_ctx; + +struct rspamd_http_context_cfg { + guint kp_cache_size_client; + guint kp_cache_size_server; + guint ssl_cache_size; + gdouble keepalive_interval; + gdouble client_key_rotate_time; + const gchar *user_agent; + const gchar *http_proxy; + const gchar *server_hdr; +}; + +/** + * Creates and configures new HTTP context + * @param root_conf configuration object + * @param ev_base event base + * @return new context used for both client and server HTTP connections + */ +struct rspamd_http_context *rspamd_http_context_create (struct rspamd_config *cfg, + struct ev_loop *ev_base, + struct upstream_ctx *ctx); + +struct rspamd_http_context *rspamd_http_context_create_config ( + struct rspamd_http_context_cfg *cfg, + struct ev_loop *ev_base, + struct upstream_ctx *ctx); + +/** + * Destroys context + * @param ctx + */ +void rspamd_http_context_free (struct rspamd_http_context *ctx); + +struct rspamd_http_context *rspamd_http_context_default (void); + +/** + * Returns preserved keepalive connection if it's available. + * Refcount is transferred to caller! + * @param ctx + * @param addr + * @param host + * @return + */ +struct rspamd_http_connection *rspamd_http_context_check_keepalive ( + struct rspamd_http_context *ctx, const rspamd_inet_addr_t *addr, + const gchar *host); + +/** + * Prepares keepalive key for a connection by creating a new entry or by reusing existent + * Bear in mind, that keepalive pool has currently no cleanup methods! + * @param ctx + * @param conn + * @param addr + * @param host + */ +void rspamd_http_context_prepare_keepalive (struct rspamd_http_context *ctx, + struct rspamd_http_connection *conn, + const rspamd_inet_addr_t *addr, + const gchar *host); + +/** + * Pushes a connection to keepalive pool after client request is finished, + * keepalive key *must* be prepared before using of this function + * @param ctx + * @param conn + * @param msg + */ +void rspamd_http_context_push_keepalive (struct rspamd_http_context *ctx, + struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + struct ev_loop *ev_base); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libserver/http/http_message.c b/src/libserver/http/http_message.c new file mode 100644 index 000000000..5f9d22178 --- /dev/null +++ b/src/libserver/http/http_message.c @@ -0,0 +1,688 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "http_message.h" +#include "http_connection.h" +#include "http_private.h" +#include "libutil/printf.h" +#include "libserver/logger.h" +#include "utlist.h" +#include "unix-std.h" + +struct rspamd_http_message * +rspamd_http_new_message (enum rspamd_http_message_type type) +{ + struct rspamd_http_message *new; + + new = g_malloc0 (sizeof (struct rspamd_http_message)); + + if (type == HTTP_REQUEST) { + new->url = rspamd_fstring_new (); + } + else { + new->url = NULL; + new->code = 200; + } + + new->port = 80; + new->type = type; + new->method = HTTP_INVALID; + new->headers = kh_init (rspamd_http_headers_hash); + + REF_INIT_RETAIN (new, rspamd_http_message_free); + + return new; +} + +struct rspamd_http_message* +rspamd_http_message_from_url (const gchar *url) +{ + struct http_parser_url pu; + struct rspamd_http_message *msg; + const gchar *host, *path; + size_t pathlen, urllen; + guint flags = 0; + + if (url == NULL) { + return NULL; + } + + urllen = strlen (url); + memset (&pu, 0, sizeof (pu)); + + if (http_parser_parse_url (url, urllen, FALSE, &pu) != 0) { + msg_warn ("cannot parse URL: %s", url); + return NULL; + } + + if ((pu.field_set & (1 << UF_HOST)) == 0) { + msg_warn ("no host argument in URL: %s", url); + return NULL; + } + + if ((pu.field_set & (1 << UF_SCHEMA))) { + if (pu.field_data[UF_SCHEMA].len == sizeof ("https") - 1 && + memcmp (url + pu.field_data[UF_SCHEMA].off, "https", 5) == 0) { + flags |= RSPAMD_HTTP_FLAG_SSL; + } + } + + if ((pu.field_set & (1 << UF_PATH)) == 0) { + path = "/"; + pathlen = 1; + } + else { + path = url + pu.field_data[UF_PATH].off; + pathlen = urllen - pu.field_data[UF_PATH].off; + } + + msg = rspamd_http_new_message (HTTP_REQUEST); + host = url + pu.field_data[UF_HOST].off; + msg->flags = flags; + + if ((pu.field_set & (1 << UF_PORT)) != 0) { + msg->port = pu.port; + } + else { + /* XXX: magic constant */ + if (flags & RSPAMD_HTTP_FLAG_SSL) { + msg->port = 443; + } + else { + msg->port = 80; + } + } + + msg->host = g_string_new_len (host, pu.field_data[UF_HOST].len); + msg->url = rspamd_fstring_append (msg->url, path, pathlen); + + REF_INIT_RETAIN (msg, rspamd_http_message_free); + + return msg; +} + +const gchar * +rspamd_http_message_get_body (struct rspamd_http_message *msg, + gsize *blen) +{ + const gchar *ret = NULL; + + if (msg->body_buf.len > 0) { + ret = msg->body_buf.begin; + } + + if (blen) { + *blen = msg->body_buf.len; + } + + return ret; +} + +static void +rspamd_http_shname_dtor (void *p) +{ + struct rspamd_storage_shmem *n = p; + +#ifdef HAVE_SANE_SHMEM + shm_unlink (n->shm_name); +#else + unlink (n->shm_name); +#endif + g_free (n->shm_name); + g_free (n); +} + +struct rspamd_storage_shmem * +rspamd_http_message_shmem_ref (struct rspamd_http_message *msg) +{ + if ((msg->flags & RSPAMD_HTTP_FLAG_SHMEM) && msg->body_buf.c.shared.name) { + REF_RETAIN (msg->body_buf.c.shared.name); + return msg->body_buf.c.shared.name; + } + + return NULL; +} + +guint +rspamd_http_message_get_flags (struct rspamd_http_message *msg) +{ + return msg->flags; +} + +void +rspamd_http_message_shmem_unref (struct rspamd_storage_shmem *p) +{ + REF_RELEASE (p); +} + +gboolean +rspamd_http_message_set_body (struct rspamd_http_message *msg, + const gchar *data, gsize len) +{ + union _rspamd_storage_u *storage; + storage = &msg->body_buf.c; + + rspamd_http_message_storage_cleanup (msg); + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + storage->shared.name = g_malloc (sizeof (*storage->shared.name)); + REF_INIT_RETAIN (storage->shared.name, rspamd_http_shname_dtor); +#ifdef HAVE_SANE_SHMEM + #if defined(__DragonFly__) + // DragonFly uses regular files for shm. User rspamd is not allowed to create + // files in the root. + storage->shared.name->shm_name = g_strdup ("/tmp/rhm.XXXXXXXXXXXXXXXXXXXX"); +#else + storage->shared.name->shm_name = g_strdup ("/rhm.XXXXXXXXXXXXXXXXXXXX"); +#endif + storage->shared.shm_fd = rspamd_shmem_mkstemp (storage->shared.name->shm_name); +#else + /* XXX: assume that tempdir is /tmp */ + storage->shared.name->shm_name = g_strdup ("/tmp/rhm.XXXXXXXXXXXXXXXXXXXX"); + storage->shared.shm_fd = mkstemp (storage->shared.name->shm_name); +#endif + + if (storage->shared.shm_fd == -1) { + return FALSE; + } + + if (len != 0 && len != ULLONG_MAX) { + if (ftruncate (storage->shared.shm_fd, len) == -1) { + return FALSE; + } + + msg->body_buf.str = mmap (NULL, len, + PROT_WRITE|PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + + if (msg->body_buf.str == MAP_FAILED) { + return FALSE; + } + + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.allocated_len = len; + + if (data != NULL) { + memcpy (msg->body_buf.str, data, len); + msg->body_buf.len = len; + } + } + else { + msg->body_buf.len = 0; + msg->body_buf.begin = NULL; + msg->body_buf.str = NULL; + msg->body_buf.allocated_len = 0; + } + } + else { + if (len != 0 && len != ULLONG_MAX) { + if (data == NULL) { + storage->normal = rspamd_fstring_sized_new (len); + msg->body_buf.len = 0; + } + else { + storage->normal = rspamd_fstring_new_init (data, len); + msg->body_buf.len = len; + } + } + else { + storage->normal = rspamd_fstring_new (); + } + + msg->body_buf.begin = storage->normal->str; + msg->body_buf.str = storage->normal->str; + msg->body_buf.allocated_len = storage->normal->allocated; + } + + msg->flags |= RSPAMD_HTTP_FLAG_HAS_BODY; + + return TRUE; +} + +void +rspamd_http_message_set_method (struct rspamd_http_message *msg, + const gchar *method) +{ + gint i; + + /* Linear search: not very efficient method */ + for (i = 0; i < HTTP_METHOD_MAX; i ++) { + if (g_ascii_strcasecmp (method, http_method_str (i)) == 0) { + msg->method = i; + } + } +} + +gboolean +rspamd_http_message_set_body_from_fd (struct rspamd_http_message *msg, + gint fd) +{ + union _rspamd_storage_u *storage; + struct stat st; + + rspamd_http_message_storage_cleanup (msg); + + storage = &msg->body_buf.c; + msg->flags |= RSPAMD_HTTP_FLAG_SHMEM|RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE; + + storage->shared.shm_fd = dup (fd); + msg->body_buf.str = MAP_FAILED; + + if (storage->shared.shm_fd == -1) { + return FALSE; + } + + if (fstat (storage->shared.shm_fd, &st) == -1) { + return FALSE; + } + + msg->body_buf.str = mmap (NULL, st.st_size, + PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + + if (msg->body_buf.str == MAP_FAILED) { + return FALSE; + } + + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.len = st.st_size; + msg->body_buf.allocated_len = st.st_size; + + return TRUE; +} + +gboolean +rspamd_http_message_set_body_from_fstring_steal (struct rspamd_http_message *msg, + rspamd_fstring_t *fstr) +{ + union _rspamd_storage_u *storage; + + rspamd_http_message_storage_cleanup (msg); + + storage = &msg->body_buf.c; + msg->flags &= ~(RSPAMD_HTTP_FLAG_SHMEM|RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE); + + storage->normal = fstr; + msg->body_buf.str = fstr->str; + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.len = fstr->len; + msg->body_buf.allocated_len = fstr->allocated; + + return TRUE; +} + +gboolean +rspamd_http_message_set_body_from_fstring_copy (struct rspamd_http_message *msg, + const rspamd_fstring_t *fstr) +{ + union _rspamd_storage_u *storage; + + rspamd_http_message_storage_cleanup (msg); + + storage = &msg->body_buf.c; + msg->flags &= ~(RSPAMD_HTTP_FLAG_SHMEM|RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE); + + storage->normal = rspamd_fstring_new_init (fstr->str, fstr->len); + msg->body_buf.str = storage->normal->str; + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.len = storage->normal->len; + msg->body_buf.allocated_len = storage->normal->allocated; + + return TRUE; +} + + +gboolean +rspamd_http_message_grow_body (struct rspamd_http_message *msg, gsize len) +{ + struct stat st; + union _rspamd_storage_u *storage; + gsize newlen; + + storage = &msg->body_buf.c; + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + if (storage->shared.shm_fd == -1) { + return FALSE; + } + + if (fstat (storage->shared.shm_fd, &st) == -1) { + return FALSE; + } + + /* Check if we need to grow */ + if ((gsize)st.st_size < msg->body_buf.len + len) { + /* Need to grow */ + newlen = rspamd_fstring_suggest_size (msg->body_buf.len, st.st_size, + len); + /* Unmap as we need another size of segment */ + if (msg->body_buf.str != MAP_FAILED) { + munmap (msg->body_buf.str, st.st_size); + } + + if (ftruncate (storage->shared.shm_fd, newlen) == -1) { + return FALSE; + } + + msg->body_buf.str = mmap (NULL, newlen, + PROT_WRITE|PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + if (msg->body_buf.str == MAP_FAILED) { + return FALSE; + } + + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.allocated_len = newlen; + } + } + else { + storage->normal = rspamd_fstring_grow (storage->normal, len); + + /* Append might cause realloc */ + msg->body_buf.begin = storage->normal->str; + msg->body_buf.len = storage->normal->len; + msg->body_buf.str = storage->normal->str; + msg->body_buf.allocated_len = storage->normal->allocated; + } + + return TRUE; +} + +gboolean +rspamd_http_message_append_body (struct rspamd_http_message *msg, + const gchar *data, gsize len) +{ + union _rspamd_storage_u *storage; + + storage = &msg->body_buf.c; + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + if (!rspamd_http_message_grow_body (msg, len)) { + return FALSE; + } + + memcpy (msg->body_buf.str + msg->body_buf.len, data, len); + msg->body_buf.len += len; + } + else { + storage->normal = rspamd_fstring_append (storage->normal, data, len); + + /* Append might cause realloc */ + msg->body_buf.begin = storage->normal->str; + msg->body_buf.len = storage->normal->len; + msg->body_buf.str = storage->normal->str; + msg->body_buf.allocated_len = storage->normal->allocated; + } + + return TRUE; +} + +void +rspamd_http_message_storage_cleanup (struct rspamd_http_message *msg) +{ + union _rspamd_storage_u *storage; + struct stat st; + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + storage = &msg->body_buf.c; + + if (storage->shared.shm_fd > 0) { + g_assert (fstat (storage->shared.shm_fd, &st) != -1); + + if (msg->body_buf.str != MAP_FAILED) { + munmap (msg->body_buf.str, st.st_size); + } + + close (storage->shared.shm_fd); + } + + if (storage->shared.name != NULL) { + REF_RELEASE (storage->shared.name); + } + + storage->shared.shm_fd = -1; + msg->body_buf.str = MAP_FAILED; + } + else { + if (msg->body_buf.c.normal) { + rspamd_fstring_free (msg->body_buf.c.normal); + } + + msg->body_buf.c.normal = NULL; + } + + msg->body_buf.len = 0; +} + +void +rspamd_http_message_free (struct rspamd_http_message *msg) +{ + struct rspamd_http_header *hdr, *hcur, *hcurtmp; + + kh_foreach_value (msg->headers, hdr, { + DL_FOREACH_SAFE (hdr, hcur, hcurtmp) { + rspamd_fstring_free (hcur->combined); + g_free (hcur); + } + }); + + kh_destroy (rspamd_http_headers_hash, msg->headers); + rspamd_http_message_storage_cleanup (msg); + + if (msg->url != NULL) { + rspamd_fstring_free (msg->url); + } + if (msg->status != NULL) { + rspamd_fstring_free (msg->status); + } + if (msg->host != NULL) { + g_string_free (msg->host, TRUE); + } + if (msg->peer_key != NULL) { + rspamd_pubkey_unref (msg->peer_key); + } + + g_free (msg); +} + +void +rspamd_http_message_set_peer_key (struct rspamd_http_message *msg, + struct rspamd_cryptobox_pubkey *pk) +{ + if (msg->peer_key != NULL) { + rspamd_pubkey_unref (msg->peer_key); + } + + if (pk) { + msg->peer_key = rspamd_pubkey_ref (pk); + } + else { + msg->peer_key = NULL; + } +} + +void +rspamd_http_message_add_header_len (struct rspamd_http_message *msg, + const gchar *name, + const gchar *value, + gsize len) +{ + struct rspamd_http_header *hdr, *found; + guint nlen, vlen; + khiter_t k; + gint r; + + if (msg != NULL && name != NULL && value != NULL) { + hdr = g_malloc0 (sizeof (struct rspamd_http_header)); + nlen = strlen (name); + vlen = len; + hdr->combined = rspamd_fstring_sized_new (nlen + vlen + 4); + rspamd_printf_fstring (&hdr->combined, "%s: %*s\r\n", name, (gint)vlen, + value); + hdr->name.begin = hdr->combined->str; + hdr->name.len = nlen; + hdr->value.begin = hdr->combined->str + nlen + 2; + hdr->value.len = vlen; + + k = kh_put (rspamd_http_headers_hash, msg->headers, &hdr->name, + &r); + + if (r != 0) { + kh_value (msg->headers, k) = hdr; + found = NULL; + } + else { + found = kh_value (msg->headers, k); + } + + DL_APPEND (found, hdr); + } +} + +void +rspamd_http_message_add_header (struct rspamd_http_message *msg, + const gchar *name, + const gchar *value) +{ + if (value) { + rspamd_http_message_add_header_len (msg, name, value, strlen (value)); + } +} + +void +rspamd_http_message_add_header_fstr (struct rspamd_http_message *msg, + const gchar *name, + rspamd_fstring_t *value) +{ + struct rspamd_http_header *hdr, *found = NULL; + guint nlen, vlen; + khiter_t k; + gint r; + + if (msg != NULL && name != NULL && value != NULL) { + hdr = g_malloc0 (sizeof (struct rspamd_http_header)); + nlen = strlen (name); + vlen = value->len; + hdr->combined = rspamd_fstring_sized_new (nlen + vlen + 4); + rspamd_printf_fstring (&hdr->combined, "%s: %V\r\n", name, value); + hdr->name.begin = hdr->combined->str; + hdr->name.len = nlen; + hdr->value.begin = hdr->combined->str + nlen + 2; + hdr->value.len = vlen; + + k = kh_put (rspamd_http_headers_hash, msg->headers, &hdr->name, + &r); + + if (r != 0) { + kh_value (msg->headers, k) = hdr; + found = NULL; + } + else { + found = kh_value (msg->headers, k); + } + + DL_APPEND (found, hdr); + } +} + +const rspamd_ftok_t * +rspamd_http_message_find_header (struct rspamd_http_message *msg, + const gchar *name) +{ + const rspamd_ftok_t *res = NULL; + rspamd_ftok_t srch; + guint slen = strlen (name); + khiter_t k; + + if (msg != NULL) { + srch.begin = name; + srch.len = slen; + + k = kh_get (rspamd_http_headers_hash, msg->headers, &srch); + + if (k != kh_end (msg->headers)) { + res = &(kh_value (msg->headers, k)->value); + } + } + + return res; +} + +GPtrArray* +rspamd_http_message_find_header_multiple ( + struct rspamd_http_message *msg, + const gchar *name) +{ + GPtrArray *res = NULL; + struct rspamd_http_header *hdr, *cur; + rspamd_ftok_t srch; + khiter_t k; + guint cnt = 0; + + guint slen = strlen (name); + + if (msg != NULL) { + srch.begin = name; + srch.len = slen; + + k = kh_get (rspamd_http_headers_hash, msg->headers, &srch); + + if (k != kh_end (msg->headers)) { + hdr = kh_value (msg->headers, k); + + LL_COUNT (hdr, cur, cnt); + res = g_ptr_array_sized_new (cnt); + + LL_FOREACH (hdr, cur) { + g_ptr_array_add (res, &cur->value); + } + } + } + + + return res; +} + + +gboolean +rspamd_http_message_remove_header (struct rspamd_http_message *msg, + const gchar *name) +{ + struct rspamd_http_header *hdr, *hcur, *hcurtmp; + gboolean res = FALSE; + guint slen = strlen (name); + rspamd_ftok_t srch; + khiter_t k; + + if (msg != NULL) { + srch.begin = name; + srch.len = slen; + + k = kh_get (rspamd_http_headers_hash, msg->headers, &srch); + + if (k != kh_end (msg->headers)) { + hdr = kh_value (msg->headers, k); + kh_del (rspamd_http_headers_hash, msg->headers, k); + res = TRUE; + + DL_FOREACH_SAFE (hdr, hcur, hcurtmp) { + rspamd_fstring_free (hcur->combined); + g_free (hcur); + } + } + } + + return res; +}
\ No newline at end of file diff --git a/src/libserver/http/http_message.h b/src/libserver/http/http_message.h new file mode 100644 index 000000000..e13c7427c --- /dev/null +++ b/src/libserver/http/http_message.h @@ -0,0 +1,236 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RSPAMD_HTTP_MESSAGE_H +#define RSPAMD_HTTP_MESSAGE_H + +#include "config.h" +#include "keypair.h" +#include "keypairs_cache.h" +#include "fstring.h" +#include "ref.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_http_connection; + +enum rspamd_http_message_type { + HTTP_REQUEST = 0, HTTP_RESPONSE +}; + +/** + * Extract the current message from a connection to deal with separately + * @param conn + * @return + */ +struct rspamd_http_message *rspamd_http_connection_steal_msg ( + struct rspamd_http_connection *conn); + +/** + * Copy the current message from a connection to deal with separately + * @param conn + * @return + */ +struct rspamd_http_message *rspamd_http_connection_copy_msg ( + struct rspamd_http_message *msg, GError **err); + +/** + * Create new HTTP message + * @param type request or response + * @return new http message + */ +struct rspamd_http_message *rspamd_http_new_message (enum rspamd_http_message_type type); + +/** + * Increase refcount number for an HTTP message + * @param msg message to use + * @return + */ +struct rspamd_http_message *rspamd_http_message_ref (struct rspamd_http_message *msg); + +/** + * Decrease number of refcounts for http message + * @param msg + */ +void rspamd_http_message_unref (struct rspamd_http_message *msg); + +/** + * Sets a key for peer + * @param msg + * @param pk + */ +void rspamd_http_message_set_peer_key (struct rspamd_http_message *msg, + struct rspamd_cryptobox_pubkey *pk); + +/** + * Create HTTP message from URL + * @param url + * @return new message or NULL + */ +struct rspamd_http_message *rspamd_http_message_from_url (const gchar *url); + +/** + * Returns body for a message + * @param msg + * @param blen pointer where to save body length + * @return pointer to body start + */ +const gchar *rspamd_http_message_get_body (struct rspamd_http_message *msg, + gsize *blen); + +/** + * Set message's body from the string + * @param msg + * @param data + * @param len + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body (struct rspamd_http_message *msg, + const gchar *data, gsize len); + +/** + * Set message's method by name + * @param msg + * @param method + */ +void rspamd_http_message_set_method (struct rspamd_http_message *msg, + const gchar *method); + +/** + * Maps fd as message's body + * @param msg + * @param fd + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body_from_fd (struct rspamd_http_message *msg, + gint fd); + +/** + * Uses rspamd_fstring_t as message's body, string is consumed by this operation + * @param msg + * @param fstr + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body_from_fstring_steal (struct rspamd_http_message *msg, + rspamd_fstring_t *fstr); + +/** + * Uses rspamd_fstring_t as message's body, string is copied by this operation + * @param msg + * @param fstr + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body_from_fstring_copy (struct rspamd_http_message *msg, + const rspamd_fstring_t *fstr); + +/** + * Appends data to message's body + * @param msg + * @param data + * @param len + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_append_body (struct rspamd_http_message *msg, + const gchar *data, gsize len); + +/** + * Append a header to http message + * @param rep + * @param name + * @param value + */ +void rspamd_http_message_add_header (struct rspamd_http_message *msg, + const gchar *name, + const gchar *value); + +void rspamd_http_message_add_header_len (struct rspamd_http_message *msg, + const gchar *name, + const gchar *value, + gsize len); + +void rspamd_http_message_add_header_fstr (struct rspamd_http_message *msg, + const gchar *name, + rspamd_fstring_t *value); + +/** + * Search for a specified header in message + * @param msg message + * @param name name of header + */ +const rspamd_ftok_t *rspamd_http_message_find_header ( + struct rspamd_http_message *msg, + const gchar *name); + +/** + * Search for a header that has multiple values + * @param msg + * @param name + * @return list of rspamd_ftok_t * with values + */ +GPtrArray *rspamd_http_message_find_header_multiple ( + struct rspamd_http_message *msg, + const gchar *name); + +/** + * Remove specific header from a message + * @param msg + * @param name + * @return + */ +gboolean rspamd_http_message_remove_header (struct rspamd_http_message *msg, + const gchar *name); + +/** + * Free HTTP message + * @param msg + */ +void rspamd_http_message_free (struct rspamd_http_message *msg); + +/** + * Extract arguments from a message's URI contained inside query string decoding + * them if needed + * @param msg HTTP request message + * @return new GHashTable which maps rspamd_ftok_t* to rspamd_ftok_t* + * (table must be freed by a caller) + */ +GHashTable *rspamd_http_message_parse_query (struct rspamd_http_message *msg); + +/** + * Increase refcount for shared file (if any) to prevent early memory unlinking + * @param msg + */ +struct rspamd_storage_shmem *rspamd_http_message_shmem_ref (struct rspamd_http_message *msg); + +/** + * Decrease external ref for shmem segment associated with a message + * @param msg + */ +void rspamd_http_message_shmem_unref (struct rspamd_storage_shmem *p); + +/** + * Returns message's flags + * @param msg + * @return + */ +guint rspamd_http_message_get_flags (struct rspamd_http_message *msg); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libserver/http/http_private.h b/src/libserver/http/http_private.h new file mode 100644 index 000000000..f2270277b --- /dev/null +++ b/src/libserver/http/http_private.h @@ -0,0 +1,127 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_HTTP_PRIVATE_H_ +#define SRC_LIBUTIL_HTTP_PRIVATE_H_ + +#include "http_connection.h" +#include "http_parser.h" +#include "str_util.h" +#include "keypair.h" +#include "keypairs_cache.h" +#include "ref.h" +#include "upstream.h" +#include "khash.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * HTTP header structure + */ +struct rspamd_http_header { + rspamd_fstring_t *combined; + rspamd_ftok_t name; + rspamd_ftok_t value; + struct rspamd_http_header *prev, *next; +}; + +KHASH_INIT (rspamd_http_headers_hash, rspamd_ftok_t *, + struct rspamd_http_header *, 1, + rspamd_ftok_icase_hash, rspamd_ftok_icase_equal); + +/** + * HTTP message structure, used for requests and replies + */ +struct rspamd_http_message { + rspamd_fstring_t *url; + GString *host; + rspamd_fstring_t *status; + khash_t (rspamd_http_headers_hash) *headers; + + struct _rspamd_body_buf_s { + /* Data start */ + const gchar *begin; + /* Data len */ + gsize len; + /* Allocated len */ + gsize allocated_len; + /* Data buffer (used to write data inside) */ + gchar *str; + + /* Internal storage */ + union _rspamd_storage_u { + rspamd_fstring_t *normal; + struct _rspamd_storage_shared_s { + struct rspamd_storage_shmem *name; + gint shm_fd; + } shared; + } c; + } body_buf; + + struct rspamd_cryptobox_pubkey *peer_key; + time_t date; + time_t last_modified; + unsigned port; + int type; + gint code; + enum http_method method; + gint flags; + ref_entry_t ref; +}; + +struct rspamd_keepalive_hash_key { + rspamd_inet_addr_t *addr; + gchar *host; + GQueue conns; +}; + +gint32 rspamd_keep_alive_key_hash (struct rspamd_keepalive_hash_key *k); + +bool rspamd_keep_alive_key_equal (struct rspamd_keepalive_hash_key *k1, + struct rspamd_keepalive_hash_key *k2); + +KHASH_INIT (rspamd_keep_alive_hash, struct rspamd_keepalive_hash_key *, + char, 0, rspamd_keep_alive_key_hash, rspamd_keep_alive_key_equal); + +struct rspamd_http_context { + struct rspamd_http_context_cfg config; + struct rspamd_keypair_cache *client_kp_cache; + struct rspamd_cryptobox_keypair *client_kp; + struct rspamd_keypair_cache *server_kp_cache; + struct upstream_ctx *ups_ctx; + struct upstream_list *http_proxies; + gpointer ssl_ctx; + gpointer ssl_ctx_noverify; + struct ev_loop *event_loop; + ev_timer client_rotate_ev; + khash_t (rspamd_keep_alive_hash) *keep_alive_hash; +}; + +#define HTTP_ERROR http_error_quark () + +GQuark http_error_quark (void); + +void rspamd_http_message_storage_cleanup (struct rspamd_http_message *msg); + +gboolean rspamd_http_message_grow_body (struct rspamd_http_message *msg, + gsize len); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBUTIL_HTTP_PRIVATE_H_ */ diff --git a/src/libserver/http/http_router.c b/src/libserver/http/http_router.c new file mode 100644 index 000000000..01d47b612 --- /dev/null +++ b/src/libserver/http/http_router.c @@ -0,0 +1,546 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "http_router.h" +#include "http_connection.h" +#include "http_private.h" +#include "libutil/regexp.h" +#include "libutil/printf.h" +#include "libserver/logger.h" +#include "utlist.h" +#include "unix-std.h" + +enum http_magic_type { + HTTP_MAGIC_PLAIN = 0, + HTTP_MAGIC_HTML, + HTTP_MAGIC_CSS, + HTTP_MAGIC_JS, + HTTP_MAGIC_PNG, + HTTP_MAGIC_JPG +}; + +static const struct _rspamd_http_magic { + const gchar *ext; + const gchar *ct; +} http_file_types[] = { + [HTTP_MAGIC_PLAIN] = { "txt", "text/plain" }, + [HTTP_MAGIC_HTML] = { "html", "text/html" }, + [HTTP_MAGIC_CSS] = { "css", "text/css" }, + [HTTP_MAGIC_JS] = { "js", "application/javascript" }, + [HTTP_MAGIC_PNG] = { "png", "image/png" }, + [HTTP_MAGIC_JPG] = { "jpg", "image/jpeg" }, +}; + +/* + * HTTP router functions + */ + +static void +rspamd_http_entry_free (struct rspamd_http_connection_entry *entry) +{ + if (entry != NULL) { + close (entry->conn->fd); + rspamd_http_connection_unref (entry->conn); + if (entry->rt->finish_handler) { + entry->rt->finish_handler (entry); + } + + DL_DELETE (entry->rt->conns, entry); + g_free (entry); + } +} + +static void +rspamd_http_router_error_handler (struct rspamd_http_connection *conn, + GError *err) +{ + struct rspamd_http_connection_entry *entry = conn->ud; + struct rspamd_http_message *msg; + + if (entry->is_reply) { + /* At this point we need to finish this session and close owned socket */ + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + rspamd_http_entry_free (entry); + } + else { + /* Here we can write a reply to a client */ + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + msg = rspamd_http_new_message (HTTP_RESPONSE); + msg->date = time (NULL); + msg->code = err->code; + rspamd_http_message_set_body (msg, err->message, strlen (err->message)); + rspamd_http_connection_reset (entry->conn); + rspamd_http_connection_write_message (entry->conn, + msg, + NULL, + "text/plain", + entry, + entry->rt->timeout); + entry->is_reply = TRUE; + } +} + +static const gchar * +rspamd_http_router_detect_ct (const gchar *path) +{ + const gchar *dot; + guint i; + + dot = strrchr (path, '.'); + if (dot == NULL) { + return http_file_types[HTTP_MAGIC_PLAIN].ct; + } + dot++; + + for (i = 0; i < G_N_ELEMENTS (http_file_types); i++) { + if (strcmp (http_file_types[i].ext, dot) == 0) { + return http_file_types[i].ct; + } + } + + return http_file_types[HTTP_MAGIC_PLAIN].ct; +} + +static gboolean +rspamd_http_router_is_subdir (const gchar *parent, const gchar *sub) +{ + if (parent == NULL || sub == NULL || *parent == '\0') { + return FALSE; + } + + while (*parent != '\0') { + if (*sub != *parent) { + return FALSE; + } + parent++; + sub++; + } + + parent--; + if (*parent == G_DIR_SEPARATOR) { + return TRUE; + } + + return (*sub == G_DIR_SEPARATOR || *sub == '\0'); +} + +static gboolean +rspamd_http_router_try_file (struct rspamd_http_connection_entry *entry, + rspamd_ftok_t *lookup, gboolean expand_path) +{ + struct stat st; + gint fd; + gchar filebuf[PATH_MAX], realbuf[PATH_MAX], *dir; + struct rspamd_http_message *reply_msg; + + rspamd_snprintf (filebuf, sizeof (filebuf), "%s%c%T", + entry->rt->default_fs_path, G_DIR_SEPARATOR, lookup); + + if (realpath (filebuf, realbuf) == NULL || + lstat (realbuf, &st) == -1) { + return FALSE; + } + + if (S_ISDIR (st.st_mode) && expand_path) { + /* Try to append 'index.html' to the url */ + rspamd_fstring_t *nlookup; + rspamd_ftok_t tok; + gboolean ret; + + nlookup = rspamd_fstring_sized_new (lookup->len + sizeof ("index.html")); + rspamd_printf_fstring (&nlookup, "%T%c%s", lookup, G_DIR_SEPARATOR, + "index.html"); + tok.begin = nlookup->str; + tok.len = nlookup->len; + ret = rspamd_http_router_try_file (entry, &tok, FALSE); + rspamd_fstring_free (nlookup); + + return ret; + } + else if (!S_ISREG (st.st_mode)) { + return FALSE; + } + + /* We also need to ensure that file is inside the defined dir */ + rspamd_strlcpy (filebuf, realbuf, sizeof (filebuf)); + dir = dirname (filebuf); + + if (dir == NULL || + !rspamd_http_router_is_subdir (entry->rt->default_fs_path, + dir)) { + return FALSE; + } + + fd = open (realbuf, O_RDONLY); + if (fd == -1) { + return FALSE; + } + + reply_msg = rspamd_http_new_message (HTTP_RESPONSE); + reply_msg->date = time (NULL); + reply_msg->code = 200; + rspamd_http_router_insert_headers (entry->rt, reply_msg); + + if (!rspamd_http_message_set_body_from_fd (reply_msg, fd)) { + close (fd); + return FALSE; + } + + close (fd); + + rspamd_http_connection_reset (entry->conn); + + msg_debug ("requested file %s", realbuf); + rspamd_http_connection_write_message (entry->conn, reply_msg, NULL, + rspamd_http_router_detect_ct (realbuf), entry, + entry->rt->timeout); + + return TRUE; +} + +static void +rspamd_http_router_send_error (GError *err, + struct rspamd_http_connection_entry *entry) +{ + struct rspamd_http_message *err_msg; + + err_msg = rspamd_http_new_message (HTTP_RESPONSE); + err_msg->date = time (NULL); + err_msg->code = err->code; + rspamd_http_message_set_body (err_msg, err->message, + strlen (err->message)); + entry->is_reply = TRUE; + err_msg->status = rspamd_fstring_new_init (err->message, strlen (err->message)); + rspamd_http_router_insert_headers (entry->rt, err_msg); + rspamd_http_connection_reset (entry->conn); + rspamd_http_connection_write_message (entry->conn, + err_msg, + NULL, + "text/plain", + entry, + entry->rt->timeout); +} + + +static int +rspamd_http_router_finish_handler (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg) +{ + struct rspamd_http_connection_entry *entry = conn->ud; + rspamd_http_router_handler_t handler = NULL; + gpointer found; + + GError *err; + rspamd_ftok_t lookup; + const rspamd_ftok_t *encoding; + struct http_parser_url u; + guint i; + rspamd_regexp_t *re; + struct rspamd_http_connection_router *router; + + G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == + sizeof (gpointer)); + + memset (&lookup, 0, sizeof (lookup)); + router = entry->rt; + + if (entry->is_reply) { + /* Request is finished, it is safe to free a connection */ + rspamd_http_entry_free (entry); + } + else { + if (G_UNLIKELY (msg->method != HTTP_GET && msg->method != HTTP_POST)) { + if (router->unknown_method_handler) { + return router->unknown_method_handler (entry, msg); + } + else { + err = g_error_new (HTTP_ERROR, 500, + "Invalid method"); + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + + rspamd_http_router_send_error (err, entry); + g_error_free (err); + + return 0; + } + } + + /* Search for path */ + if (msg->url != NULL && msg->url->len != 0) { + + http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u); + + if (u.field_set & (1 << UF_PATH)) { + guint unnorm_len; + lookup.begin = msg->url->str + u.field_data[UF_PATH].off; + lookup.len = u.field_data[UF_PATH].len; + + rspamd_http_normalize_path_inplace ((gchar *)lookup.begin, + lookup.len, + &unnorm_len); + lookup.len = unnorm_len; + } + else { + lookup.begin = msg->url->str; + lookup.len = msg->url->len; + } + + found = g_hash_table_lookup (entry->rt->paths, &lookup); + memcpy (&handler, &found, sizeof (found)); + msg_debug ("requested known path: %T", &lookup); + } + else { + err = g_error_new (HTTP_ERROR, 404, + "Empty path requested"); + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + + rspamd_http_router_send_error (err, entry); + g_error_free (err); + + return 0; + } + + entry->is_reply = TRUE; + + encoding = rspamd_http_message_find_header (msg, "Accept-Encoding"); + + if (encoding && rspamd_substring_search (encoding->begin, encoding->len, + "gzip", 4) != -1) { + entry->support_gzip = TRUE; + } + + if (handler != NULL) { + return handler (entry, msg); + } + else { + /* Try regexps */ + for (i = 0; i < router->regexps->len; i ++) { + re = g_ptr_array_index (router->regexps, i); + if (rspamd_regexp_match (re, lookup.begin, lookup.len, + TRUE)) { + found = rspamd_regexp_get_ud (re); + memcpy (&handler, &found, sizeof (found)); + + return handler (entry, msg); + } + } + + /* Now try plain file */ + if (entry->rt->default_fs_path == NULL || lookup.len == 0 || + !rspamd_http_router_try_file (entry, &lookup, TRUE)) { + + err = g_error_new (HTTP_ERROR, 404, + "Not found"); + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + + msg_info ("path: %T not found", &lookup); + rspamd_http_router_send_error (err, entry); + g_error_free (err); + } + } + } + + return 0; +} + +struct rspamd_http_connection_router * +rspamd_http_router_new (rspamd_http_router_error_handler_t eh, + rspamd_http_router_finish_handler_t fh, + ev_tstamp timeout, + const char *default_fs_path, + struct rspamd_http_context *ctx) +{ + struct rspamd_http_connection_router *nrouter; + struct stat st; + + nrouter = g_malloc0 (sizeof (struct rspamd_http_connection_router)); + nrouter->paths = g_hash_table_new_full (rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, NULL); + nrouter->regexps = g_ptr_array_new (); + nrouter->conns = NULL; + nrouter->error_handler = eh; + nrouter->finish_handler = fh; + nrouter->response_headers = g_hash_table_new_full (rspamd_strcase_hash, + rspamd_strcase_equal, g_free, g_free); + nrouter->event_loop = ctx->event_loop; + nrouter->timeout = timeout; + nrouter->default_fs_path = NULL; + + if (default_fs_path != NULL) { + if (stat (default_fs_path, &st) == -1) { + msg_err ("cannot stat %s", default_fs_path); + } + else { + if (!S_ISDIR (st.st_mode)) { + msg_err ("path %s is not a directory", default_fs_path); + } + else { + nrouter->default_fs_path = realpath (default_fs_path, NULL); + } + } + } + + nrouter->ctx = ctx; + + return nrouter; +} + +void +rspamd_http_router_set_key (struct rspamd_http_connection_router *router, + struct rspamd_cryptobox_keypair *key) +{ + g_assert (key != NULL); + + router->key = rspamd_keypair_ref (key); +} + +void +rspamd_http_router_add_path (struct rspamd_http_connection_router *router, + const gchar *path, rspamd_http_router_handler_t handler) +{ + gpointer ptr; + rspamd_ftok_t *key; + rspamd_fstring_t *storage; + G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == + sizeof (gpointer)); + + if (path != NULL && handler != NULL && router != NULL) { + memcpy (&ptr, &handler, sizeof (ptr)); + storage = rspamd_fstring_new_init (path, strlen (path)); + key = g_malloc0 (sizeof (*key)); + key->begin = storage->str; + key->len = storage->len; + g_hash_table_insert (router->paths, key, ptr); + } +} + +void +rspamd_http_router_set_unknown_handler (struct rspamd_http_connection_router *router, + rspamd_http_router_handler_t handler) +{ + if (router != NULL) { + router->unknown_method_handler = handler; + } +} + +void +rspamd_http_router_add_header (struct rspamd_http_connection_router *router, + const gchar *name, const gchar *value) +{ + if (name != NULL && value != NULL && router != NULL) { + g_hash_table_replace (router->response_headers, g_strdup (name), + g_strdup (value)); + } +} + +void +rspamd_http_router_insert_headers (struct rspamd_http_connection_router *router, + struct rspamd_http_message *msg) +{ + GHashTableIter it; + gpointer k, v; + + if (router && msg) { + g_hash_table_iter_init (&it, router->response_headers); + + while (g_hash_table_iter_next (&it, &k, &v)) { + rspamd_http_message_add_header (msg, k, v); + } + } +} + +void +rspamd_http_router_add_regexp (struct rspamd_http_connection_router *router, + struct rspamd_regexp_s *re, rspamd_http_router_handler_t handler) +{ + gpointer ptr; + G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == + sizeof (gpointer)); + + if (re != NULL && handler != NULL && router != NULL) { + memcpy (&ptr, &handler, sizeof (ptr)); + rspamd_regexp_set_ud (re, ptr); + g_ptr_array_add (router->regexps, rspamd_regexp_ref (re)); + } +} + +void +rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router, + gint fd, gpointer ud) +{ + struct rspamd_http_connection_entry *conn; + + conn = g_malloc0 (sizeof (struct rspamd_http_connection_entry)); + conn->rt = router; + conn->ud = ud; + conn->is_reply = FALSE; + + conn->conn = rspamd_http_connection_new_server (router->ctx, + fd, + NULL, + rspamd_http_router_error_handler, + rspamd_http_router_finish_handler, + 0); + + if (router->key) { + rspamd_http_connection_set_key (conn->conn, router->key); + } + + rspamd_http_connection_read_message (conn->conn, conn, router->timeout); + DL_PREPEND (router->conns, conn); +} + +void +rspamd_http_router_free (struct rspamd_http_connection_router *router) +{ + struct rspamd_http_connection_entry *conn, *tmp; + rspamd_regexp_t *re; + guint i; + + if (router) { + DL_FOREACH_SAFE (router->conns, conn, tmp) { + rspamd_http_entry_free (conn); + } + + if (router->key) { + rspamd_keypair_unref (router->key); + } + + if (router->default_fs_path != NULL) { + g_free (router->default_fs_path); + } + + for (i = 0; i < router->regexps->len; i ++) { + re = g_ptr_array_index (router->regexps, i); + rspamd_regexp_unref (re); + } + + g_ptr_array_free (router->regexps, TRUE); + g_hash_table_unref (router->paths); + g_hash_table_unref (router->response_headers); + g_free (router); + } +}
\ No newline at end of file diff --git a/src/libserver/http/http_router.h b/src/libserver/http/http_router.h new file mode 100644 index 000000000..115ee9b8a --- /dev/null +++ b/src/libserver/http/http_router.h @@ -0,0 +1,149 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RSPAMD_HTTP_ROUTER_H +#define RSPAMD_HTTP_ROUTER_H + +#include "config.h" +#include "http_connection.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_http_connection_router; +struct rspamd_http_connection_entry; + +typedef int (*rspamd_http_router_handler_t) (struct rspamd_http_connection_entry + *conn_ent, + struct rspamd_http_message *msg); + +typedef void (*rspamd_http_router_error_handler_t) (struct rspamd_http_connection_entry *conn_ent, + GError *err); + +typedef void (*rspamd_http_router_finish_handler_t) (struct rspamd_http_connection_entry *conn_ent); + + +struct rspamd_http_connection_entry { + struct rspamd_http_connection_router *rt; + struct rspamd_http_connection *conn; + gpointer ud; + gboolean is_reply; + gboolean support_gzip; + struct rspamd_http_connection_entry *prev, *next; +}; + +struct rspamd_http_connection_router { + struct rspamd_http_connection_entry *conns; + GHashTable *paths; + GHashTable *response_headers; + GPtrArray *regexps; + ev_tstamp timeout; + struct ev_loop *event_loop; + struct rspamd_http_context *ctx; + gchar *default_fs_path; + rspamd_http_router_handler_t unknown_method_handler; + struct rspamd_cryptobox_keypair *key; + rspamd_http_router_error_handler_t error_handler; + rspamd_http_router_finish_handler_t finish_handler; +}; + +/** + * Create new http connection router and the associated HTTP connection + * @param eh error handler callback + * @param fh finish handler callback + * @param default_fs_path if not NULL try to serve static files from + * the specified directory + * @return + */ +struct rspamd_http_connection_router *rspamd_http_router_new ( + rspamd_http_router_error_handler_t eh, + rspamd_http_router_finish_handler_t fh, + ev_tstamp timeout, + const char *default_fs_path, + struct rspamd_http_context *ctx); + +/** + * Set encryption key for the HTTP router + * @param router router structure + * @param key opaque key structure + */ +void rspamd_http_router_set_key (struct rspamd_http_connection_router *router, + struct rspamd_cryptobox_keypair *key); + +/** + * Add new path to the router + */ +void rspamd_http_router_add_path (struct rspamd_http_connection_router *router, + const gchar *path, rspamd_http_router_handler_t handler); + +/** + * Add custom header to append to router replies + * @param router + * @param name + * @param value + */ +void rspamd_http_router_add_header (struct rspamd_http_connection_router *router, + const gchar *name, const gchar *value); + +/** + * Sets method to handle unknown request methods + * @param router + * @param handler + */ +void rspamd_http_router_set_unknown_handler (struct rspamd_http_connection_router *router, + rspamd_http_router_handler_t handler); + +/** + * Inserts router headers to the outbound message + * @param router + * @param msg + */ +void rspamd_http_router_insert_headers (struct rspamd_http_connection_router *router, + struct rspamd_http_message *msg); + +struct rspamd_regexp_s; + +/** + * Adds new pattern to router, regexp object is refcounted by this function + * @param router + * @param re + * @param handler + */ +void rspamd_http_router_add_regexp (struct rspamd_http_connection_router *router, + struct rspamd_regexp_s *re, rspamd_http_router_handler_t handler); + +/** + * Handle new accepted socket + * @param router router object + * @param fd server socket + * @param ud opaque userdata + */ +void rspamd_http_router_handle_socket ( + struct rspamd_http_connection_router *router, + gint fd, + gpointer ud); + +/** + * Free router and all connections associated + * @param router + */ +void rspamd_http_router_free (struct rspamd_http_connection_router *router); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libserver/http/http_util.c b/src/libserver/http/http_util.c new file mode 100644 index 000000000..89e74a94b --- /dev/null +++ b/src/libserver/http/http_util.c @@ -0,0 +1,513 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "libserver/http/http_util.h" +#include "libutil/printf.h" +#include "libutil/util.h" + +static const gchar *http_week[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; +static const gchar *http_month[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; + +/* + * Obtained from nginx + * Copyright (C) Igor Sysoev + */ +static guint mday[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + +time_t +rspamd_http_parse_date (const gchar *header, gsize len) +{ + const gchar *p, *end; + gint month; + guint day, year, hour, min, sec; + guint64 time; + enum { + no = 0, rfc822, /* Tue, 10 Nov 2002 23:50:13 */ + rfc850, /* Tuesday, 10-Dec-02 23:50:13 */ + isoc /* Tue Dec 10 23:50:13 2002 */ + } fmt; + + fmt = 0; + if (len > 0) { + end = header + len; + } + else { + end = header + strlen (header); + } + + day = 32; + year = 2038; + + for (p = header; p < end; p++) { + if (*p == ',') { + break; + } + + if (*p == ' ') { + fmt = isoc; + break; + } + } + + for (p++; p < end; p++) + if (*p != ' ') { + break; + } + + if (end - p < 18) { + return (time_t)-1; + } + + if (fmt != isoc) { + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + day = (*p - '0') * 10 + *(p + 1) - '0'; + p += 2; + + if (*p == ' ') { + if (end - p < 18) { + return (time_t)-1; + } + fmt = rfc822; + + } + else if (*p == '-') { + fmt = rfc850; + + } + else { + return (time_t)-1; + } + + p++; + } + + switch (*p) { + + case 'J': + month = *(p + 1) == 'a' ? 0 : *(p + 2) == 'n' ? 5 : 6; + break; + + case 'F': + month = 1; + break; + + case 'M': + month = *(p + 2) == 'r' ? 2 : 4; + break; + + case 'A': + month = *(p + 1) == 'p' ? 3 : 7; + break; + + case 'S': + month = 8; + break; + + case 'O': + month = 9; + break; + + case 'N': + month = 10; + break; + + case 'D': + month = 11; + break; + + default: + return (time_t)-1; + } + + p += 3; + + if ((fmt == rfc822 && *p != ' ') || (fmt == rfc850 && *p != '-')) { + return (time_t)-1; + } + + p++; + + if (fmt == rfc822) { + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' + || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' + || *(p + 3) > '9') { + return (time_t)-1; + } + + year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 + + (*(p + 2) - '0') * 10 + *(p + 3) - '0'; + p += 4; + + } + else if (fmt == rfc850) { + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + year = (*p - '0') * 10 + *(p + 1) - '0'; + year += (year < 70) ? 2000 : 1900; + p += 2; + } + + if (fmt == isoc) { + if (*p == ' ') { + p++; + } + + if (*p < '0' || *p > '9') { + return (time_t)-1; + } + + day = *p++ - '0'; + + if (*p != ' ') { + if (*p < '0' || *p > '9') { + return (time_t)-1; + } + + day = day * 10 + *p++ - '0'; + } + + if (end - p < 14) { + return (time_t)-1; + } + } + + if (*p++ != ' ') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + hour = (*p - '0') * 10 + *(p + 1) - '0'; + p += 2; + + if (*p++ != ':') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + min = (*p - '0') * 10 + *(p + 1) - '0'; + p += 2; + + if (*p++ != ':') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + sec = (*p - '0') * 10 + *(p + 1) - '0'; + + if (fmt == isoc) { + p += 2; + + if (*p++ != ' ') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' + || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' + || *(p + 3) > '9') { + return (time_t)-1; + } + + year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 + + (*(p + 2) - '0') * 10 + *(p + 3) - '0'; + } + + if (hour > 23 || min > 59 || sec > 59) { + return (time_t)-1; + } + + if (day == 29 && month == 1) { + if ((year & 3) || ((year % 100 == 0) && (year % 400) != 0)) { + return (time_t)-1; + } + + } + else if (day > mday[month]) { + return (time_t)-1; + } + + /* + * shift new year to March 1 and start months from 1 (not 0), + * it is needed for Gauss' formula + */ + + if (--month <= 0) { + month += 12; + year -= 1; + } + + /* Gauss' formula for Gregorian days since March 1, 1 BC */ + + time = (guint64) ( + /* days in years including leap years since March 1, 1 BC */ + + 365 * year + year / 4 - year / 100 + year / 400 + + /* days before the month */ + + + 367 * month / 12 - 30 + + /* days before the day */ + + + day - 1 + + /* + * 719527 days were between March 1, 1 BC and March 1, 1970, + * 31 and 28 days were in January and February 1970 + */ + + - 719527 + 31 + 28) * 86400 + hour * 3600 + min * 60 + sec; + + return (time_t) time; +} + +glong +rspamd_http_date_format (gchar *buf, gsize len, time_t time) +{ + struct tm tms; + + rspamd_gmtime (time, &tms); + + return rspamd_snprintf (buf, len, "%s, %02d %s %4d %02d:%02d:%02d GMT", + http_week[tms.tm_wday], tms.tm_mday, + http_month[tms.tm_mon], tms.tm_year + 1900, + tms.tm_hour, tms.tm_min, tms.tm_sec); +} + +void +rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen) +{ + const gchar *p, *end, *slash = NULL, *dot = NULL; + gchar *o; + enum { + st_normal = 0, + st_got_dot, + st_got_dot_dot, + st_got_slash, + st_got_slash_slash, + } state = st_normal; + + p = path; + end = path + len; + o = path; + + while (p < end) { + switch (state) { + case st_normal: + if (G_UNLIKELY (*p == '/')) { + state = st_got_slash; + slash = p; + } + else if (G_UNLIKELY (*p == '.')) { + state = st_got_dot; + dot = p; + } + else { + *o++ = *p; + } + p ++; + break; + case st_got_slash: + if (G_UNLIKELY (*p == '/')) { + /* Ignore double slash */ + *o++ = *p; + state = st_got_slash_slash; + } + else if (G_UNLIKELY (*p == '.')) { + dot = p; + state = st_got_dot; + } + else { + *o++ = '/'; + *o++ = *p; + slash = NULL; + dot = NULL; + state = st_normal; + } + p ++; + break; + case st_got_slash_slash: + if (G_LIKELY (*p != '/')) { + slash = p - 1; + dot = NULL; + state = st_normal; + continue; + } + p ++; + break; + case st_got_dot: + if (G_UNLIKELY (*p == '/')) { + /* Remove any /./ or ./ paths */ + if (((o > path && *(o - 1) != '/') || (o == path)) && slash) { + /* Preserve one slash */ + *o++ = '/'; + } + + slash = p; + dot = NULL; + /* Ignore last slash */ + state = st_normal; + } + else if (*p == '.') { + /* Double dot character */ + state = st_got_dot_dot; + } + else { + /* We have something like .some or /.some */ + if (dot && p > dot) { + if (slash == dot - 1 && (o > path && *(o - 1) != '/')) { + /* /.blah */ + memmove (o, slash, p - slash); + o += p - slash; + } + else { + memmove (o, dot, p - dot); + o += p - dot; + } + } + + slash = NULL; + dot = NULL; + state = st_normal; + continue; + } + + p ++; + break; + case st_got_dot_dot: + if (*p == '/') { + /* We have something like /../ or ../ */ + if (slash) { + /* We need to remove the last component from o if it is there */ + if (o > path + 2 && *(o - 1) == '/') { + slash = rspamd_memrchr (path, '/', o - path - 2); + } + else if (o > path + 1) { + slash = rspamd_memrchr (path, '/', o - path - 1); + } + else { + slash = NULL; + } + + if (slash) { + o = (gchar *)slash; + } + /* Otherwise we keep these dots */ + slash = p; + state = st_got_slash; + } + else { + /* We have something like bla../, so we need to copy it as is */ + if (o > path && dot && p > dot) { + memmove (o, dot, p - dot); + o += p - dot; + } + + slash = NULL; + dot = NULL; + state = st_normal; + continue; + } + } + else { + /* We have something like ..bla or ... */ + if (slash) { + *o ++ = '/'; + } + + if (dot && p > dot) { + memmove (o, dot, p - dot); + o += p - dot; + } + + slash = NULL; + dot = NULL; + state = st_normal; + continue; + } + + p ++; + break; + } + } + + /* Leftover */ + switch (state) { + case st_got_dot_dot: + /* Trailing .. */ + if (slash) { + /* We need to remove the last component from o if it is there */ + if (o > path + 2 && *(o - 1) == '/') { + slash = rspamd_memrchr (path, '/', o - path - 2); + } + else if (o > path + 1) { + slash = rspamd_memrchr (path, '/', o - path - 1); + } + else { + if (o == path) { + /* Corner case */ + *o++ = '/'; + } + + slash = NULL; + } + + if (slash) { + /* Remove last / */ + o = (gchar *)slash; + } + } + else { + /* Corner case */ + if (o == path) { + *o++ = '/'; + } + else { + if (dot && p > dot) { + memmove (o, dot, p - dot); + o += p - dot; + } + } + } + break; + case st_got_slash: + *o++ = '/'; + break; + default: + if (o > path + 1 && *(o - 1) == '/') { + o --; + } + break; + } + + if (nlen) { + *nlen = (o - path); + } +}
\ No newline at end of file diff --git a/src/libserver/http/http_util.h b/src/libserver/http/http_util.h new file mode 100644 index 000000000..7a22ffb16 --- /dev/null +++ b/src/libserver/http/http_util.h @@ -0,0 +1,56 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_HTTP_UTIL_H +#define RSPAMD_HTTP_UTIL_H + +#include "config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Parse HTTP date header and return it as time_t + * @param header HTTP date header + * @param len length of header + * @return time_t or (time_t)-1 in case of error + */ +time_t rspamd_http_parse_date (const gchar *header, gsize len); + +/** + * Prints HTTP date from `time` to `buf` using standard HTTP date format + * @param buf date buffer + * @param len length of buffer + * @param time time in unix seconds + * @return number of bytes written + */ +glong rspamd_http_date_format (gchar *buf, gsize len, time_t time); + +/** + * Normalize HTTP path removing dot sequences and repeating '/' symbols as + * per rfc3986#section-5.2 + * @param path + * @param len + * @param nlen + */ +void rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libserver/logger/logger.c b/src/libserver/logger/logger.c index 568c0b8e9..4e48eae4e 100644 --- a/src/libserver/logger/logger.c +++ b/src/libserver/logger/logger.c @@ -16,8 +16,8 @@ #include "config.h" #include "logger.h" #include "rspamd.h" -#include "map.h" -#include "map_helpers.h" +#include "libserver/maps/map.h" +#include "libserver/maps/map_helpers.h" #include "ottery.h" #include "unix-std.h" #include "logger_private.h" diff --git a/src/libserver/logger/logger_file.c b/src/libserver/logger/logger_file.c index 9a242ac61..d8ce15de2 100644 --- a/src/libserver/logger/logger_file.c +++ b/src/libserver/logger/logger_file.c @@ -89,7 +89,6 @@ direct_write_log_line (rspamd_logger_t *rspamd_log, gboolean is_iov, gint level_flags) { - gchar errmsg[128]; struct iovec *iov; const gchar *line; glong r; diff --git a/src/libserver/maps/map.c b/src/libserver/maps/map.c new file mode 100644 index 000000000..ff3a38f90 --- /dev/null +++ b/src/libserver/maps/map.c @@ -0,0 +1,2923 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Implementation of map files handling + */ + +#include "config.h" +#include "map.h" +#include "map_private.h" +#include "libserver/http/http_connection.h" +#include "libserver/http/http_private.h" +#include "rspamd.h" +#include "contrib/zstd/zstd.h" +#include "contrib/libev/ev.h" +#include "contrib/uthash/utlist.h" + +#undef MAP_DEBUG_REFS +#ifdef MAP_DEBUG_REFS +#define MAP_RETAIN(x, t) do { \ + msg_err (G_GNUC_PRETTY_FUNCTION ": " t ": retain ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount + 1); \ + REF_RETAIN(x); \ +} while (0) + +#define MAP_RELEASE(x, t) do { \ + msg_err (G_GNUC_PRETTY_FUNCTION ": " t ": release ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount - 1); \ + REF_RELEASE(x); \ +} while (0) +#else +#define MAP_RETAIN(x, t) REF_RETAIN(x) +#define MAP_RELEASE(x, t) REF_RELEASE(x) +#endif + +enum rspamd_map_periodic_opts { + RSPAMD_MAP_SCHEDULE_NORMAL = 0, + RSPAMD_MAP_SCHEDULE_ERROR = (1u << 0u), + RSPAMD_MAP_SCHEDULE_LOCKED = (1u << 1u), + RSPAMD_MAP_SCHEDULE_INIT = (1u << 2u), +}; + +static void free_http_cbdata_common (struct http_callback_data *cbd, + gboolean plan_new); +static void free_http_cbdata_dtor (gpointer p); +static void free_http_cbdata (struct http_callback_data *cbd); +static void rspamd_map_process_periodic (struct map_periodic_cbdata *cbd); +static void rspamd_map_schedule_periodic (struct rspamd_map *map, int how); +static gboolean read_map_file_chunks (struct rspamd_map *map, + struct map_cb_data *cbdata, + const gchar *fname, + gsize len, + goffset off); +static gboolean rspamd_map_save_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata, + const guchar *data, + gsize len); +static gboolean rspamd_map_update_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata); + +guint rspamd_map_log_id = (guint)-1; +RSPAMD_CONSTRUCTOR(rspamd_map_log_init) +{ + rspamd_map_log_id = rspamd_logger_add_debug_module("map"); +} + +/** + * Write HTTP request + */ +static void +write_http_request (struct http_callback_data *cbd) +{ + gchar datebuf[128]; + struct rspamd_http_message *msg; + + msg = rspamd_http_new_message (HTTP_REQUEST); + + if (cbd->bk->protocol == MAP_PROTO_HTTPS) { + msg->flags |= RSPAMD_HTTP_FLAG_SSL; + } + + if (cbd->check) { + msg->method = HTTP_HEAD; + } + + msg->url = rspamd_fstring_append (msg->url, + cbd->data->path, strlen (cbd->data->path)); + + if (cbd->check) { + if (cbd->data->last_modified != 0) { + rspamd_http_date_format (datebuf, sizeof (datebuf), + cbd->data->last_modified); + rspamd_http_message_add_header (msg, "If-Modified-Since", + datebuf); + } + if (cbd->data->etag) { + rspamd_http_message_add_header_len (msg, "If-None-Match", + cbd->data->etag->str, cbd->data->etag->len); + } + } + + msg->url = rspamd_fstring_append (msg->url, cbd->data->rest, + strlen (cbd->data->rest)); + + if (cbd->data->userinfo) { + rspamd_http_message_add_header (msg, "Authorization", + cbd->data->userinfo); + } + + MAP_RETAIN (cbd, "http_callback_data"); + rspamd_http_connection_write_message (cbd->conn, + msg, + cbd->data->host, + NULL, + cbd, + cbd->timeout); +} + +/** + * Callback for destroying HTTP callback data + */ +static void +free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new) +{ + struct map_periodic_cbdata *periodic = cbd->periodic; + + if (cbd->shmem_data) { + rspamd_http_message_shmem_unref (cbd->shmem_data); + } + + if (cbd->pk) { + rspamd_pubkey_unref (cbd->pk); + } + + if (cbd->conn) { + rspamd_http_connection_unref (cbd->conn); + cbd->conn = NULL; + } + + if (cbd->addrs) { + rspamd_inet_addr_t *addr; + guint i; + + PTR_ARRAY_FOREACH (cbd->addrs, i, addr) { + rspamd_inet_address_free (addr); + } + + g_ptr_array_free (cbd->addrs, TRUE); + } + + + MAP_RELEASE (cbd->bk, "rspamd_map_backend"); + + if (periodic) { + /* Detached in case of HTTP error */ + MAP_RELEASE (periodic, "periodic"); + } + + g_free (cbd); +} + +static void +free_http_cbdata (struct http_callback_data *cbd) +{ + cbd->map->tmp_dtor = NULL; + cbd->map->tmp_dtor_data = NULL; + + free_http_cbdata_common (cbd, TRUE); +} + +static void +free_http_cbdata_dtor (gpointer p) +{ + struct http_callback_data *cbd = p; + struct rspamd_map *map; + + map = cbd->map; + if (cbd->stage == http_map_http_conn) { + REF_RELEASE (cbd); + } + else { + /* We cannot terminate DNS requests sent */ + cbd->stage = http_map_terminated; + } + + msg_warn_map ("%s: " + "connection with http server is terminated: worker is stopping", + map->name); +} + +/* + * HTTP callbacks + */ +static void +http_map_error (struct rspamd_http_connection *conn, + GError *err) +{ + struct http_callback_data *cbd = conn->ud; + struct rspamd_map *map; + + map = cbd->map; + + if (cbd->periodic) { + cbd->periodic->errored = TRUE; + msg_err_map ("error reading %s(%s): " + "connection with http server terminated incorrectly: %e", + cbd->bk->uri, + cbd->addr ? rspamd_inet_address_to_string_pretty (cbd->addr) : "", + err); + + rspamd_map_process_periodic (cbd->periodic); + } + + MAP_RELEASE (cbd, "http_callback_data"); +} + +static void +rspamd_map_cache_cb (struct ev_loop *loop, ev_timer *w, int revents) +{ + struct rspamd_http_map_cached_cbdata *cache_cbd = (struct rspamd_http_map_cached_cbdata *) + w->data; + struct rspamd_map *map; + struct http_map_data *data; + + map = cache_cbd->map; + data = cache_cbd->data; + + if (cache_cbd->gen != cache_cbd->data->gen) { + /* We have another update, so this cache element is obviously expired */ + /* + * Important!: we do not set cache availability to zero here, as there + * might be fresh cache + */ + msg_info_map ("cached data is now expired (gen mismatch %L != %L) for %s", + cache_cbd->gen, cache_cbd->data->gen, map->name); + MAP_RELEASE (cache_cbd->shm, "rspamd_http_map_cached_cbdata"); + ev_timer_stop (loop, &cache_cbd->timeout); + g_free (cache_cbd); + } + else if (cache_cbd->data->last_checked >= cache_cbd->last_checked) { + /* + * We checked map but we have not found anything more recent, + * reschedule cache check + */ + if (cache_cbd->map->poll_timeout > + rspamd_get_calendar_ticks () - cache_cbd->data->last_checked) { + w->repeat = cache_cbd->map->poll_timeout - + (rspamd_get_calendar_ticks () - cache_cbd->data->last_checked); + } + else { + w->repeat = cache_cbd->map->poll_timeout; + } + + cache_cbd->last_checked = cache_cbd->data->last_checked; + msg_debug_map ("cached data is up to date for %s", map->name); + ev_timer_again (loop, &cache_cbd->timeout); + } + else { + data->cur_cache_cbd = NULL; + g_atomic_int_set (&data->cache->available, 0); + MAP_RELEASE (cache_cbd->shm, "rspamd_http_map_cached_cbdata"); + msg_info_map ("cached data is now expired for %s", map->name); + ev_timer_stop (loop, &cache_cbd->timeout); + g_free (cache_cbd); + } +} + +static int +http_map_finish (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg) +{ + struct http_callback_data *cbd = conn->ud; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + struct http_map_data *data; + struct rspamd_http_map_cached_cbdata *cache_cbd; + const rspamd_ftok_t *expires_hdr, *etag_hdr; + char next_check_date[128]; + guchar *in = NULL; + gsize dlen = 0; + + map = cbd->map; + bk = cbd->bk; + data = bk->data.hd; + + if (msg->code == 200) { + + if (cbd->check) { + msg_info_map ("need to reread map from %s", cbd->bk->uri); + cbd->periodic->need_modify = TRUE; + /* Reset the whole chain */ + cbd->periodic->cur_backend = 0; + /* Reset cache, old cached data will be cleaned on timeout */ + g_atomic_int_set (&data->cache->available, 0); + data->cur_cache_cbd = NULL; + + rspamd_map_process_periodic (cbd->periodic); + MAP_RELEASE (cbd, "http_callback_data"); + + return 0; + } + + cbd->data->last_checked = msg->date; + + if (msg->last_modified) { + cbd->data->last_modified = msg->last_modified; + } + else { + cbd->data->last_modified = msg->date; + } + + + /* Unsigned version - just open file */ + cbd->shmem_data = rspamd_http_message_shmem_ref (msg); + cbd->data_len = msg->body_buf.len; + + if (cbd->data_len == 0) { + msg_err_map ("cannot read empty map"); + goto err; + } + + g_assert (cbd->shmem_data != NULL); + + in = rspamd_shmem_xmap (cbd->shmem_data->shm_name, PROT_READ, &dlen); + + if (in == NULL) { + msg_err_map ("cannot read tempfile %s: %s", + cbd->shmem_data->shm_name, + strerror (errno)); + goto err; + } + + /* Check for expires */ + double cached_timeout = map->poll_timeout * 2; + + expires_hdr = rspamd_http_message_find_header (msg, "Expires"); + + if (expires_hdr) { + time_t hdate; + + hdate = rspamd_http_parse_date (expires_hdr->begin, expires_hdr->len); + + if (hdate != (time_t)-1 && hdate > msg->date) { + cached_timeout = map->next_check - msg->date + + map->poll_timeout * 2; + + map->next_check = hdate; + } + } + + /* Check for etag */ + etag_hdr = rspamd_http_message_find_header (msg, "ETag"); + + if (etag_hdr) { + if (cbd->data->etag) { + /* Remove old etag */ + rspamd_fstring_free (cbd->data->etag); + } + + cbd->data->etag = rspamd_fstring_new_init (etag_hdr->begin, + etag_hdr->len); + } + else { + if (cbd->data->etag) { + /* Remove and clear old etag */ + rspamd_fstring_free (cbd->data->etag); + cbd->data->etag = NULL; + } + } + + MAP_RETAIN (cbd->shmem_data, "shmem_data"); + cbd->data->gen ++; + /* + * We know that a map is in the locked state + */ + g_atomic_int_set (&data->cache->available, 1); + /* Store cached data */ + rspamd_strlcpy (data->cache->shmem_name, cbd->shmem_data->shm_name, + sizeof (data->cache->shmem_name)); + data->cache->len = cbd->data_len; + data->cache->last_modified = cbd->data->last_modified; + cache_cbd = g_malloc0 (sizeof (*cache_cbd)); + cache_cbd->shm = cbd->shmem_data; + cache_cbd->event_loop = cbd->event_loop; + cache_cbd->map = map; + cache_cbd->data = cbd->data; + cache_cbd->last_checked = cbd->data->last_checked; + cache_cbd->gen = cbd->data->gen; + MAP_RETAIN (cache_cbd->shm, "shmem_data"); + + ev_timer_init (&cache_cbd->timeout, rspamd_map_cache_cb, cached_timeout, + 0.0); + ev_timer_start (cbd->event_loop, &cache_cbd->timeout); + cache_cbd->timeout.data = cache_cbd; + data->cur_cache_cbd = cache_cbd; + + if (map->next_check) { + rspamd_http_date_format (next_check_date, sizeof (next_check_date), + map->next_check); + } + else { + rspamd_http_date_format (next_check_date, sizeof (next_check_date), + rspamd_get_calendar_ticks () + map->poll_timeout); + } + + + if (cbd->bk->is_compressed) { + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + guchar *out; + gsize outlen, r; + + zstream = ZSTD_createDStream (); + ZSTD_initDStream (zstream); + + zin.pos = 0; + zin.src = in; + zin.size = dlen; + + if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) { + outlen = ZSTD_DStreamOutSize (); + } + + out = g_malloc (outlen); + + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream (zstream, &zout, &zin); + + if (ZSTD_isError (r)) { + msg_err_map ("%s(%s): cannot decompress data: %s", + cbd->bk->uri, + rspamd_inet_address_to_string_pretty (cbd->addr), + ZSTD_getErrorName (r)); + ZSTD_freeDStream (zstream); + g_free (out); + MAP_RELEASE (cbd->shmem_data, "shmem_data"); + goto err; + } + + if (zout.pos == zout.size) { + /* We need to extend output buffer */ + zout.size = zout.size * 2 + 1.0; + out = g_realloc (zout.dst, zout.size); + zout.dst = out; + } + } + + ZSTD_freeDStream (zstream); + msg_info_map ("%s(%s): read map data %z bytes compressed, " + "%z uncompressed, next check at %s", + cbd->bk->uri, + rspamd_inet_address_to_string_pretty (cbd->addr), + dlen, zout.pos, next_check_date); + map->read_callback (out, zout.pos, &cbd->periodic->cbdata, TRUE); + rspamd_map_save_http_cached_file (map, bk, cbd->data, out, zout.pos); + g_free (out); + } + else { + msg_info_map ("%s(%s): read map data %z bytes, next check at %s", + cbd->bk->uri, + rspamd_inet_address_to_string_pretty (cbd->addr), + dlen, next_check_date); + rspamd_map_save_http_cached_file (map, bk, cbd->data, in, cbd->data_len); + map->read_callback (in, cbd->data_len, &cbd->periodic->cbdata, TRUE); + } + + MAP_RELEASE (cbd->shmem_data, "shmem_data"); + + cbd->periodic->cur_backend ++; + munmap (in, dlen); + rspamd_map_process_periodic (cbd->periodic); + } + else if (msg->code == 304 && cbd->check) { + cbd->data->last_checked = msg->date; + + if (msg->last_modified) { + cbd->data->last_modified = msg->last_modified; + } + else { + cbd->data->last_modified = msg->date; + } + + expires_hdr = rspamd_http_message_find_header (msg, "Expires"); + + if (expires_hdr) { + time_t hdate; + + hdate = rspamd_http_parse_date (expires_hdr->begin, expires_hdr->len); + if (hdate != (time_t)-1 && hdate > msg->date) { + map->next_check = hdate; + } + } + + etag_hdr = rspamd_http_message_find_header (msg, "ETag"); + + if (etag_hdr) { + if (cbd->data->etag) { + /* Remove old etag */ + rspamd_fstring_free (cbd->data->etag); + cbd->data->etag = rspamd_fstring_new_init (etag_hdr->begin, + etag_hdr->len); + } + } + + if (map->next_check) { + rspamd_http_date_format (next_check_date, sizeof (next_check_date), + map->next_check); + msg_info_map ("data is not modified for server %s, next check at %s " + "(http cache based)", + cbd->data->host, next_check_date); + } + else { + rspamd_http_date_format (next_check_date, sizeof (next_check_date), + rspamd_get_calendar_ticks () + map->poll_timeout); + msg_info_map ("data is not modified for server %s, next check at %s " + "(timer based)", + cbd->data->host, next_check_date); + } + + rspamd_map_update_http_cached_file (map, bk, cbd->data); + cbd->periodic->cur_backend ++; + rspamd_map_process_periodic (cbd->periodic); + } + else { + msg_info_map ("cannot load map %s from %s: HTTP error %d", + bk->uri, cbd->data->host, msg->code); + goto err; + } + + MAP_RELEASE (cbd, "http_callback_data"); + return 0; + +err: + cbd->periodic->errored = 1; + rspamd_map_process_periodic (cbd->periodic); + MAP_RELEASE (cbd, "http_callback_data"); + + return 0; +} + +static gboolean +read_map_file_chunks (struct rspamd_map *map, struct map_cb_data *cbdata, + const gchar *fname, gsize len, goffset off) +{ + gint fd; + gssize r, avail; + gsize buflen = 1024 * 1024; + gchar *pos, *bytes; + + fd = rspamd_file_xopen (fname, O_RDONLY, 0, TRUE); + + if (fd == -1) { + msg_err_map ("can't open map for buffered reading %s: %s", + fname, strerror (errno)); + return FALSE; + } + + if (lseek (fd, off, SEEK_SET) == -1) { + msg_err_map ("can't seek in map to pos %d for buffered reading %s: %s", + (gint)off, fname, strerror (errno)); + return FALSE; + } + + buflen = MIN (len, buflen); + bytes = g_malloc (buflen); + avail = buflen; + pos = bytes; + + while ((r = read (fd, pos, avail)) > 0) { + gchar *end = bytes + (pos - bytes) + r; + msg_debug_map ("%s: read map chunk, %z bytes", fname, + r); + pos = map->read_callback (bytes, end - bytes, cbdata, r == len); + + if (pos && pos > bytes && pos < end) { + guint remain = end - pos; + + memmove (bytes, pos, remain); + pos = bytes + remain; + /* Need to preserve the remain */ + avail = ((gssize)buflen) - remain; + + if (avail <= 0) { + /* Try realloc, too large element */ + g_assert (buflen >= remain); + bytes = g_realloc (bytes, buflen * 2); + + pos = bytes + remain; /* Adjust */ + avail += buflen; + buflen *= 2; + } + } + else { + avail = buflen; + pos = bytes; + } + + len -= r; + } + + if (r == -1) { + msg_err_map ("can't read from map %s: %s", fname, strerror (errno)); + close (fd); + g_free (bytes); + + return FALSE; + } + + close (fd); + g_free (bytes); + + return TRUE; +} + +static gboolean +rspamd_map_check_sig_pk_mem (const guchar *sig, + gsize siglen, + struct rspamd_map *map, + const guchar *input, + gsize inlen, + struct rspamd_cryptobox_pubkey *pk) +{ + GString *b32_key; + gboolean ret = TRUE; + + if (siglen != rspamd_cryptobox_signature_bytes (RSPAMD_CRYPTOBOX_MODE_25519)) { + msg_err_map ("can't open signature for %s: invalid size: %z", map->name, siglen); + + ret = FALSE; + } + + if (ret && !rspamd_cryptobox_verify (sig, siglen, input, inlen, + rspamd_pubkey_get_pk (pk, NULL), RSPAMD_CRYPTOBOX_MODE_25519)) { + msg_err_map ("can't verify signature for %s: incorrect signature", map->name); + + ret = FALSE; + } + + if (ret) { + b32_key = rspamd_pubkey_print (pk, + RSPAMD_KEYPAIR_BASE32 | RSPAMD_KEYPAIR_PUBKEY); + msg_info_map ("verified signature for %s using trusted key %v", + map->name, b32_key); + g_string_free (b32_key, TRUE); + } + + return ret; +} + +static gboolean +rspamd_map_check_file_sig (const char *fname, + struct rspamd_map *map, + struct rspamd_map_backend *bk, + const guchar *input, + gsize inlen) { + guchar *data; + struct rspamd_cryptobox_pubkey *pk = NULL; + GString *b32_key; + gboolean ret = TRUE; + gsize len = 0; + gchar fpath[PATH_MAX]; + + if (bk->trusted_pubkey == NULL) { + /* Try to load and check pubkey */ + rspamd_snprintf (fpath, sizeof (fpath), "%s.pub", fname); + data = rspamd_file_xmap (fpath, PROT_READ, &len, TRUE); + + if (data == NULL) { + msg_err_map ("can't open pubkey %s: %s", fpath, strerror (errno)); + return FALSE; + } + + pk = rspamd_pubkey_from_base32 (data, len, RSPAMD_KEYPAIR_SIGN, + RSPAMD_CRYPTOBOX_MODE_25519); + munmap (data, len); + + if (pk == NULL) { + msg_err_map ("can't load pubkey %s", fpath); + return FALSE; + } + + /* We just check pk against the trusted db of keys */ + b32_key = rspamd_pubkey_print (pk, + RSPAMD_KEYPAIR_BASE32 | RSPAMD_KEYPAIR_PUBKEY); + g_assert (b32_key != NULL); + + if (g_hash_table_lookup (map->cfg->trusted_keys, b32_key->str) == NULL) { + msg_err_map ("pubkey loaded from %s is untrusted: %v", fpath, + b32_key); + g_string_free (b32_key, TRUE); + rspamd_pubkey_unref (pk); + + return FALSE; + } + + g_string_free (b32_key, TRUE); + } + else { + pk = rspamd_pubkey_ref (bk->trusted_pubkey); + } + + rspamd_snprintf (fpath, sizeof (fpath), "%s.sig", fname); + data = rspamd_shmem_xmap (fpath, PROT_READ, &len); + + if (data == NULL) { + msg_err_map ("can't open signature %s: %s", fpath, strerror (errno)); + ret = FALSE; + } + + if (ret) { + ret = rspamd_map_check_sig_pk_mem (data, len, map, input, inlen, pk); + munmap (data, len); + } + + rspamd_pubkey_unref (pk); + + return ret; +} + +/** + * Callback for reading data from file + */ +static gboolean +read_map_file (struct rspamd_map *map, struct file_map_data *data, + struct rspamd_map_backend *bk, struct map_periodic_cbdata *periodic) +{ + gchar *bytes; + gsize len; + struct stat st; + + if (map->read_callback == NULL || map->fin_callback == NULL) { + msg_err_map ("%s: bad callback for reading map file", + data->filename); + return FALSE; + } + + if (stat (data->filename, &st) == -1) { + /* File does not exist, skipping */ + if (errno != ENOENT) { + msg_err_map ("%s: map file is unavailable for reading: %s", + data->filename, strerror (errno)); + + return FALSE; + } + else { + msg_info_map ("%s: map file is not found; " + "it will be read automatically if created", + data->filename); + return TRUE; + } + } + + ev_stat_stat (map->event_loop, &data->st_ev); + len = st.st_size; + + if (bk->is_signed) { + bytes = rspamd_file_xmap (data->filename, PROT_READ, &len, TRUE); + + if (bytes == NULL) { + msg_err_map ("can't open map %s: %s", data->filename, strerror (errno)); + return FALSE; + } + + if (!rspamd_map_check_file_sig (data->filename, map, bk, bytes, len)) { + munmap (bytes, len); + + return FALSE; + } + + munmap (bytes, len); + } + + if (len > 0) { + if (bk->is_compressed) { + bytes = rspamd_file_xmap (data->filename, PROT_READ, &len, TRUE); + + if (bytes == NULL) { + msg_err_map ("can't open map %s: %s", data->filename, strerror (errno)); + return FALSE; + } + + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + guchar *out; + gsize outlen, r; + + zstream = ZSTD_createDStream (); + ZSTD_initDStream (zstream); + + zin.pos = 0; + zin.src = bytes; + zin.size = len; + + if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) { + outlen = ZSTD_DStreamOutSize (); + } + + out = g_malloc (outlen); + + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream (zstream, &zout, &zin); + + if (ZSTD_isError (r)) { + msg_err_map ("%s: cannot decompress data: %s", + data->filename, + ZSTD_getErrorName (r)); + ZSTD_freeDStream (zstream); + g_free (out); + munmap (bytes, len); + return FALSE; + } + + if (zout.pos == zout.size) { + /* We need to extend output buffer */ + zout.size = zout.size * 2 + 1; + out = g_realloc (zout.dst, zout.size); + zout.dst = out; + } + } + + ZSTD_freeDStream (zstream); + msg_info_map ("%s: read map data, %z bytes compressed, " + "%z uncompressed)", data->filename, + len, zout.pos); + map->read_callback (out, zout.pos, &periodic->cbdata, TRUE); + g_free (out); + + munmap (bytes, len); + } + else { + /* Perform buffered read: fail-safe */ + if (!read_map_file_chunks (map, &periodic->cbdata, data->filename, + len, 0)) { + return FALSE; + } + } + } + else { + /* Empty map */ + map->read_callback (NULL, 0, &periodic->cbdata, TRUE); + } + + return TRUE; +} + +static gboolean +read_map_static (struct rspamd_map *map, struct static_map_data *data, + struct rspamd_map_backend *bk, struct map_periodic_cbdata *periodic) +{ + guchar *bytes; + gsize len; + + if (map->read_callback == NULL || map->fin_callback == NULL) { + msg_err_map ("%s: bad callback for reading map file", map->name); + data->processed = TRUE; + return FALSE; + } + + bytes = data->data; + len = data->len; + + if (len > 0) { + if (bk->is_compressed) { + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + guchar *out; + gsize outlen, r; + + zstream = ZSTD_createDStream (); + ZSTD_initDStream (zstream); + + zin.pos = 0; + zin.src = bytes; + zin.size = len; + + if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) { + outlen = ZSTD_DStreamOutSize (); + } + + out = g_malloc (outlen); + + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream (zstream, &zout, &zin); + + if (ZSTD_isError (r)) { + msg_err_map ("%s: cannot decompress data: %s", + map->name, + ZSTD_getErrorName (r)); + ZSTD_freeDStream (zstream); + g_free (out); + + return FALSE; + } + + if (zout.pos == zout.size) { + /* We need to extend output buffer */ + zout.size = zout.size * 2 + 1; + out = g_realloc (zout.dst, zout.size); + zout.dst = out; + } + } + + ZSTD_freeDStream (zstream); + msg_info_map ("%s: read map data, %z bytes compressed, " + "%z uncompressed)", + map->name, + len, zout.pos); + map->read_callback (out, zout.pos, &periodic->cbdata, TRUE); + g_free (out); + } + else { + msg_info_map ("%s: read map data, %z bytes", + map->name, len); + map->read_callback (bytes, len, &periodic->cbdata, TRUE); + } + } + else { + map->read_callback (NULL, 0, &periodic->cbdata, TRUE); + } + + data->processed = TRUE; + + return TRUE; +} + +static void +rspamd_map_periodic_dtor (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + + map = periodic->map; + msg_debug_map ("periodic dtor %p", periodic); + + if (periodic->need_modify) { + /* We are done */ + periodic->map->fin_callback (&periodic->cbdata, periodic->map->user_data); + } + else { + /* Not modified */ + } + + if (periodic->locked) { + g_atomic_int_set (periodic->map->locked, 0); + msg_debug_map ("unlocked map %s", periodic->map->name); + + if (periodic->map->wrk->state == rspamd_worker_state_running) { + rspamd_map_schedule_periodic (periodic->map, + RSPAMD_SYMBOL_RESULT_NORMAL); + } + else { + msg_debug_map ("stop scheduling periodics for %s; terminating state", + periodic->map->name); + } + } + + g_free (periodic); +} + +/* Called on timer execution */ +static void +rspamd_map_periodic_callback (struct ev_loop *loop, ev_timer *w, int revents) +{ + struct map_periodic_cbdata *cbd = (struct map_periodic_cbdata *)w->data; + + MAP_RETAIN (cbd, "periodic"); + ev_timer_stop (loop, w); + rspamd_map_process_periodic (cbd); + MAP_RELEASE (cbd, "periodic"); +} + +static void +rspamd_map_schedule_periodic (struct rspamd_map *map, int how) +{ + const gdouble error_mult = 20.0, lock_mult = 0.1; + static const gdouble min_timer_interval = 2.0; + const gchar *reason = "unknown reason"; + gdouble jittered_sec; + gdouble timeout; + struct map_periodic_cbdata *cbd; + + if (map->scheduled_check || (map->wrk && + map->wrk->state != rspamd_worker_state_running)) { + /* + * Do not schedule check if some check is already scheduled or + * if worker is going to die + */ + return; + } + + if (!(how & RSPAMD_MAP_SCHEDULE_INIT) && map->static_only) { + /* No need to schedule anything for static maps */ + return; + } + + if (map->non_trivial && map->next_check != 0) { + timeout = map->next_check - rspamd_get_calendar_ticks (); + + if (timeout > 0 && timeout < map->poll_timeout) { + /* Early check case, jitter */ + gdouble poll_timeout = map->poll_timeout; + + if (how & RSPAMD_MAP_SCHEDULE_ERROR) { + poll_timeout = map->poll_timeout * error_mult; + reason = "early active non-trivial check (after error)"; + } + else if (how & RSPAMD_MAP_SCHEDULE_LOCKED) { + poll_timeout = map->poll_timeout * lock_mult; + reason = "early active non-trivial check (after being locked)"; + } + else { + reason = "early active non-trivial check"; + } + + jittered_sec = MIN (timeout, poll_timeout); + + } + else if (timeout <= 0) { + /* Data is already expired, need to check */ + jittered_sec = 0.0; + reason = "expired non-trivial data"; + } + else { + /* No need to check now, wait till next_check */ + jittered_sec = timeout; + reason = "valid non-trivial data"; + } + } + else { + timeout = map->poll_timeout; + + if (how & RSPAMD_MAP_SCHEDULE_INIT) { + timeout = 0.0; + reason = "init scheduled check"; + } + else { + if (how & RSPAMD_MAP_SCHEDULE_ERROR) { + timeout = map->poll_timeout * error_mult; + reason = "errored scheduled check"; + } + else if (how & RSPAMD_MAP_SCHEDULE_LOCKED) { + timeout = map->poll_timeout * lock_mult; + reason = "locked scheduled check"; + } + else { + reason = "normal scheduled check"; + } + } + + jittered_sec = rspamd_time_jitter (timeout, 0); + } + + /* Now, we do some sanity checks for jittered seconds */ + if (!(how & RSPAMD_MAP_SCHEDULE_INIT)) { + /* Never allow too low interval between timer checks, it is epxensive */ + if (jittered_sec < min_timer_interval) { + jittered_sec = rspamd_time_jitter (min_timer_interval, 0); + } + + if (map->non_trivial) { + /* + * Even if we are reported that we need to reload cache often, we + * still want to be sane in terms of events... + */ + if (jittered_sec < min_timer_interval * 2.0) { + if (map->nelts > 0) { + jittered_sec = min_timer_interval * 3.0; + } + } + } + } + + cbd = g_malloc0 (sizeof (*cbd)); + cbd->cbdata.state = 0; + cbd->cbdata.prev_data = *map->user_data; + cbd->cbdata.cur_data = NULL; + cbd->cbdata.map = map; + cbd->map = map; + map->scheduled_check = cbd; + REF_INIT_RETAIN (cbd, rspamd_map_periodic_dtor); + + cbd->ev.data = cbd; + ev_timer_init (&cbd->ev, rspamd_map_periodic_callback, jittered_sec, 0.0); + ev_timer_start (map->event_loop, &cbd->ev); + + msg_debug_map ("schedule new periodic event %p in %.3f seconds for %s; reason: %s", + cbd, jittered_sec, map->name, reason); +} + +static void +rspamd_map_dns_callback (struct rdns_reply *reply, void *arg) +{ + struct http_callback_data *cbd = arg; + struct rdns_reply_entry *cur_rep; + struct rspamd_map *map; + guint flags = RSPAMD_HTTP_CLIENT_SIMPLE|RSPAMD_HTTP_CLIENT_SHARED; + + map = cbd->map; + + msg_debug_map ("got dns reply with code %s on stage %d", + rdns_strerror (reply->code), cbd->stage); + + if (cbd->stage == http_map_terminated) { + MAP_RELEASE (cbd, "http_callback_data"); + return; + } + + if (reply->code == RDNS_RC_NOERROR) { + /* + * We just get the first address hoping that a resolver performs + * round-robin rotation well + */ + + DL_FOREACH (reply->entries, cur_rep) { + rspamd_inet_addr_t *addr; + addr = rspamd_inet_address_from_rnds (reply->entries); + + if (addr != NULL) { + rspamd_inet_address_set_port (addr, cbd->data->port); + g_ptr_array_add (cbd->addrs, (void *)addr); + } + } + + if (cbd->stage == http_map_resolve_host2) { + /* We have still one request pending */ + cbd->stage = http_map_resolve_host1; + } + else if (cbd->stage == http_map_resolve_host1) { + cbd->stage = http_map_http_conn; + } + } + else if (cbd->stage < http_map_http_conn) { + if (cbd->stage == http_map_resolve_host2) { + /* We have still one request pending */ + cbd->stage = http_map_resolve_host1; + } + else if (cbd->addrs->len == 0) { + /* We could not resolve host, so cowardly fail here */ + msg_err_map ("cannot resolve %s: %s", cbd->data->host, + rdns_strerror (reply->code)); + cbd->periodic->errored = 1; + rspamd_map_process_periodic (cbd->periodic); + } + else { + /* We have at least one address, so we can continue... */ + cbd->stage = http_map_http_conn; + } + } + + if (cbd->stage == http_map_http_conn && cbd->addrs->len > 0) { + guint selected_addr_idx; + + selected_addr_idx = rspamd_random_uint64_fast () % cbd->addrs->len; + cbd->addr = (rspamd_inet_addr_t *)g_ptr_array_index (cbd->addrs, + selected_addr_idx); + + msg_debug_map ("open http connection to %s", + rspamd_inet_address_to_string_pretty (cbd->addr)); + cbd->conn = rspamd_http_connection_new_client (NULL, + NULL, + http_map_error, + http_map_finish, + flags, + cbd->addr); + + if (cbd->conn != NULL) { + write_http_request (cbd); + } + else { + cbd->periodic->errored = TRUE; + msg_err_map ("error reading %s(%s): " + "connection with http server terminated incorrectly: %s", + cbd->bk->uri, + cbd->addr ? rspamd_inet_address_to_string_pretty (cbd->addr) : "", + strerror (errno)); + + rspamd_map_process_periodic (cbd->periodic); + } + } + + MAP_RELEASE (cbd, "http_callback_data"); +} + +static gboolean +rspamd_map_read_cached (struct rspamd_map *map, struct rspamd_map_backend *bk, + struct map_periodic_cbdata *periodic, const gchar *host) +{ + gsize len; + gpointer in; + struct http_map_data *data; + + data = bk->data.hd; + + in = rspamd_shmem_xmap (data->cache->shmem_name, PROT_READ, &len); + + if (in == NULL) { + msg_err ("cannot map cache from %s: %s", data->cache->shmem_name, + strerror (errno)); + return FALSE; + } + + if (len < data->cache->len) { + msg_err ("cannot map cache from %s: bad length %z, %z expected", + data->cache->shmem_name, + len, data->cache->len); + munmap (in, len); + + return FALSE; + } + + if (bk->is_compressed) { + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + guchar *out; + gsize outlen, r; + + zstream = ZSTD_createDStream (); + ZSTD_initDStream (zstream); + + zin.pos = 0; + zin.src = in; + zin.size = len; + + if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) { + outlen = ZSTD_DStreamOutSize (); + } + + out = g_malloc (outlen); + + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream (zstream, &zout, &zin); + + if (ZSTD_isError (r)) { + msg_err_map ("%s: cannot decompress data: %s", + bk->uri, + ZSTD_getErrorName (r)); + ZSTD_freeDStream (zstream); + g_free (out); + munmap (in, len); + return FALSE; + } + + if (zout.pos == zout.size) { + /* We need to extend output buffer */ + zout.size = zout.size * 2 + 1; + out = g_realloc (zout.dst, zout.size); + zout.dst = out; + } + } + + ZSTD_freeDStream (zstream); + msg_info_map ("%s: read map data cached %z bytes compressed, " + "%z uncompressed", bk->uri, + len, zout.pos); + map->read_callback (out, zout.pos, &periodic->cbdata, TRUE); + g_free (out); + } + else { + msg_info_map ("%s: read map data cached %z bytes", bk->uri, + len); + map->read_callback (in, len, &periodic->cbdata, TRUE); + } + + munmap (in, len); + + return TRUE; +} + +static gboolean +rspamd_map_has_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + struct stat st; + + if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + if (stat (path, &st) != -1 && st.st_size > + sizeof (struct rspamd_http_file_data)) { + return TRUE; + } + + return FALSE; +} + +static gboolean +rspamd_map_save_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata, + const guchar *data, + gsize len) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + gint fd; + struct rspamd_http_file_data header; + + if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + fd = rspamd_file_xopen (path, O_WRONLY | O_TRUNC | O_CREAT, + 00600, FALSE); + + if (fd == -1) { + return FALSE; + } + + if (!rspamd_file_lock (fd, FALSE)) { + msg_err_map ("cannot lock file %s: %s", path, strerror (errno)); + close (fd); + + return FALSE; + } + + memcpy (header.magic, rspamd_http_file_magic, sizeof (rspamd_http_file_magic)); + header.mtime = htdata->last_modified; + header.next_check = map->next_check; + header.data_off = sizeof (header); + + if (htdata->etag) { + header.data_off += RSPAMD_FSTRING_LEN (htdata->etag); + header.etag_len = RSPAMD_FSTRING_LEN (htdata->etag); + } + else { + header.etag_len = 0; + } + + if (write (fd, &header, sizeof (header)) != sizeof (header)) { + msg_err_map ("cannot write file %s (header stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + if (header.etag_len > 0) { + if (write (fd, RSPAMD_FSTRING_DATA (htdata->etag), header.etag_len) != + header.etag_len) { + msg_err_map ("cannot write file %s (etag stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + } + + /* Now write the rest */ + if (write (fd, data, len) != len) { + msg_err_map ("cannot write file %s (data stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + rspamd_file_unlock (fd, FALSE); + close (fd); + + msg_info_map ("saved data from %s in %s, %uz bytes", bk->uri, path, len + + sizeof (header) + header.etag_len); + + return TRUE; +} + +static gboolean +rspamd_map_update_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + gint fd; + struct rspamd_http_file_data header; + + if (!rspamd_map_has_http_cached_file (map, bk)) { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + fd = rspamd_file_xopen (path, O_WRONLY, + 00600, FALSE); + + if (fd == -1) { + return FALSE; + } + + if (!rspamd_file_lock (fd, FALSE)) { + msg_err_map ("cannot lock file %s: %s", path, strerror (errno)); + close (fd); + + return FALSE; + } + + memcpy (header.magic, rspamd_http_file_magic, sizeof (rspamd_http_file_magic)); + header.mtime = htdata->last_modified; + header.next_check = map->next_check; + header.data_off = sizeof (header); + + if (htdata->etag) { + header.data_off += RSPAMD_FSTRING_LEN (htdata->etag); + header.etag_len = RSPAMD_FSTRING_LEN (htdata->etag); + } + else { + header.etag_len = 0; + } + + if (write (fd, &header, sizeof (header)) != sizeof (header)) { + msg_err_map ("cannot update file %s (header stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + if (header.etag_len > 0) { + if (write (fd, RSPAMD_FSTRING_DATA (htdata->etag), header.etag_len) != + header.etag_len) { + msg_err_map ("cannot update file %s (etag stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + } + + rspamd_file_unlock (fd, FALSE); + close (fd); + + return TRUE; +} + + +static gboolean +rspamd_map_read_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata, + struct map_cb_data *cbdata) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + gint fd; + struct stat st; + struct rspamd_http_file_data header; + + if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + fd = rspamd_file_xopen (path, O_RDONLY, 00600, FALSE); + + if (fd == -1) { + return FALSE; + } + + if (!rspamd_file_lock (fd, FALSE)) { + msg_err_map ("cannot lock file %s: %s", path, strerror (errno)); + close (fd); + + return FALSE; + } + + (void)fstat (fd, &st); + + if (read (fd, &header, sizeof (header)) != sizeof (header)) { + msg_err_map ("cannot read file %s (header stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + if (memcmp (header.magic, rspamd_http_file_magic, + sizeof (rspamd_http_file_magic)) != 0) { + msg_warn_map ("invalid or old version magic in file %s; ignore it", path); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + map->next_check = header.next_check; + htdata->last_modified = header.mtime; + + if (header.etag_len > 0) { + rspamd_fstring_t *etag = rspamd_fstring_sized_new (header.etag_len); + + if (read (fd, RSPAMD_FSTRING_DATA (etag), header.etag_len) != header.etag_len) { + msg_err_map ("cannot read file %s (etag stage): %s", path, + strerror (errno)); + rspamd_file_unlock (fd, FALSE); + rspamd_fstring_free (etag); + close (fd); + + return FALSE; + } + + etag->len = header.etag_len; + + if (htdata->etag) { + /* FIXME: should be dealt somehow better */ + msg_warn_map ("etag is already defined as %V; cached is %V; ignore cached", + htdata->etag, etag); + rspamd_fstring_free (etag); + } + else { + htdata->etag = etag; + } + } + + rspamd_file_unlock (fd, FALSE); + close (fd); + + /* Now read file data */ + /* Perform buffered read: fail-safe */ + if (!read_map_file_chunks (map, cbdata, path, + st.st_size - header.data_off, header.data_off)) { + return FALSE; + } + + struct tm tm; + gchar ncheck_buf[32], lm_buf[32]; + + rspamd_localtime (map->next_check, &tm); + strftime (ncheck_buf, sizeof (ncheck_buf) - 1, "%Y-%m-%d %H:%M:%S", &tm); + rspamd_localtime (htdata->last_modified, &tm); + strftime (lm_buf, sizeof (lm_buf) - 1, "%Y-%m-%d %H:%M:%S", &tm); + + msg_info_map ("read cached data for %s from %s, %uz bytes; next check at: %s;" + " last modified on: %s; etag: %V", + bk->uri, + path, + (size_t)(st.st_size - header.data_off), + ncheck_buf, + lm_buf, + htdata->etag); + + return TRUE; +} + +/** + * Async HTTP callback + */ +static void +rspamd_map_common_http_callback (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct map_periodic_cbdata *periodic, + gboolean check) +{ + struct http_map_data *data; + struct http_callback_data *cbd; + guint flags = RSPAMD_HTTP_CLIENT_SIMPLE|RSPAMD_HTTP_CLIENT_SHARED; + + data = bk->data.hd; + + if (g_atomic_int_get (&data->cache->available) == 1) { + /* Read cached data */ + if (check) { + if (data->last_modified < data->cache->last_modified) { + periodic->need_modify = TRUE; + /* Reset the whole chain */ + periodic->cur_backend = 0; + rspamd_map_process_periodic (periodic); + } + else { + if (map->active_http) { + /* Check even if there is a cached version */ + goto check; + } + else { + /* Switch to the next backend */ + periodic->cur_backend++; + rspamd_map_process_periodic (periodic); + } + } + + return; + } + else { + if (map->active_http && + data->last_modified > data->cache->last_modified) { + goto check; + } + else if (rspamd_map_read_cached (map, bk, periodic, data->host)) { + /* Switch to the next backend */ + periodic->cur_backend++; + data->last_modified = data->cache->last_modified; + rspamd_map_process_periodic (periodic); + + return; + } + } + } + else if (!map->active_http) { + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); + + return; + } + +check: + cbd = g_malloc0 (sizeof (struct http_callback_data)); + + cbd->event_loop = map->event_loop; + cbd->addrs = g_ptr_array_sized_new (4); + cbd->map = map; + cbd->data = data; + cbd->check = check; + cbd->periodic = periodic; + MAP_RETAIN (periodic, "periodic"); + cbd->bk = bk; + MAP_RETAIN (bk, "rspamd_map_backend"); + cbd->stage = http_map_terminated; + REF_INIT_RETAIN (cbd, free_http_cbdata); + + msg_debug_map ("%s map data from %s", check ? "checking" : "reading", + data->host); + + /* Try address */ + rspamd_inet_addr_t *addr = NULL; + + if (rspamd_parse_inet_address (&addr, data->host, + strlen (data->host), RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { + rspamd_inet_address_set_port (addr, cbd->data->port); + g_ptr_array_add (cbd->addrs, (void *)addr); + cbd->conn = rspamd_http_connection_new_client ( + NULL, + NULL, + http_map_error, + http_map_finish, + flags, + addr); + + if (cbd->conn != NULL) { + cbd->stage = http_map_http_conn; + write_http_request (cbd); + cbd->addr = addr; + MAP_RELEASE (cbd, "http_callback_data"); + } + else { + msg_warn_map ("cannot load map: cannot connect to %s: %s", + data->host, strerror (errno)); + MAP_RELEASE (cbd, "http_callback_data"); + } + + return; + } + else if (map->r->r) { + /* Send both A and AAAA requests */ + guint nreq = 0; + + if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd, + map->cfg->dns_timeout, map->cfg->dns_retransmits, 1, + data->host, RDNS_REQUEST_A)) { + MAP_RETAIN (cbd, "http_callback_data"); + nreq ++; + } + if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd, + map->cfg->dns_timeout, map->cfg->dns_retransmits, 1, + data->host, RDNS_REQUEST_AAAA)) { + MAP_RETAIN (cbd, "http_callback_data"); + nreq ++; + } + + if (nreq == 2) { + cbd->stage = http_map_resolve_host2; + } + else if (nreq == 1) { + cbd->stage = http_map_resolve_host1; + } + + map->tmp_dtor = free_http_cbdata_dtor; + map->tmp_dtor_data = cbd; + } + else { + msg_warn_map ("cannot load map: DNS resolver is not initialized"); + cbd->periodic->errored = TRUE; + } + + MAP_RELEASE (cbd, "http_callback_data"); +} + +static void +rspamd_map_http_check_callback (struct map_periodic_cbdata *cbd) +{ + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + map = cbd->map; + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + + rspamd_map_common_http_callback (map, bk, cbd, TRUE); +} + +static void +rspamd_map_http_read_callback (struct map_periodic_cbdata *cbd) +{ + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + map = cbd->map; + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + rspamd_map_common_http_callback (map, bk, cbd, FALSE); +} + +static void +rspamd_map_file_check_callback (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + struct file_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.fd; + + if (data->need_modify) { + periodic->need_modify = TRUE; + periodic->cur_backend = 0; + data->need_modify = FALSE; + + rspamd_map_process_periodic (periodic); + + return; + } + + map = periodic->map; + /* Switch to the next backend as the rest is handled by ev_stat */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); +} + +static void +rspamd_map_static_check_callback (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + struct static_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.sd; + + if (!data->processed) { + periodic->need_modify = TRUE; + periodic->cur_backend = 0; + + rspamd_map_process_periodic (periodic); + + return; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); +} + +static void +rspamd_map_file_read_callback (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + struct file_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.fd; + + msg_info_map ("rereading map file %s", data->filename); + + if (!read_map_file (map, data, bk, periodic)) { + periodic->errored = TRUE; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); +} + +static void +rspamd_map_static_read_callback (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + struct static_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.sd; + + msg_info_map ("rereading static map"); + + if (!read_map_static (map, data, bk, periodic)) { + periodic->errored = TRUE; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); +} + +static void +rspamd_map_process_periodic (struct map_periodic_cbdata *cbd) +{ + struct rspamd_map_backend *bk; + struct rspamd_map *map; + + map = cbd->map; + map->scheduled_check = NULL; + + if (!map->file_only && !cbd->locked) { + if (!g_atomic_int_compare_and_exchange (cbd->map->locked, + 0, 1)) { + msg_debug_map ( + "don't try to reread map %s as it is locked by other process, " + "will reread it later", cbd->map->name); + rspamd_map_schedule_periodic (map, RSPAMD_MAP_SCHEDULE_LOCKED); + MAP_RELEASE (cbd, "periodic"); + + return; + } + else { + msg_debug_map ("locked map %s", cbd->map->name); + cbd->locked = TRUE; + } + } + + if (cbd->errored) { + /* We should not check other backends if some backend has failed */ + rspamd_map_schedule_periodic (cbd->map, RSPAMD_MAP_SCHEDULE_ERROR); + + if (cbd->locked) { + g_atomic_int_set (cbd->map->locked, 0); + cbd->locked = FALSE; + } + + msg_debug_map ("unlocked map %s, refcount=%d", cbd->map->name, + cbd->ref.refcount); + MAP_RELEASE (cbd, "periodic"); + + return; + } + + /* For each backend we need to check for modifications */ + if (cbd->cur_backend >= cbd->map->backends->len) { + /* Last backend */ + msg_debug_map ("finished map: %d of %d", cbd->cur_backend, + cbd->map->backends->len); + MAP_RELEASE (cbd, "periodic"); + + return; + } + + if (cbd->map->wrk && cbd->map->wrk->state == rspamd_worker_state_running) { + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + g_assert (bk != NULL); + + if (cbd->need_modify) { + /* Load data from the next backend */ + switch (bk->protocol) { + case MAP_PROTO_HTTP: + case MAP_PROTO_HTTPS: + rspamd_map_http_read_callback (cbd); + break; + case MAP_PROTO_FILE: + rspamd_map_file_read_callback (cbd); + break; + case MAP_PROTO_STATIC: + rspamd_map_static_read_callback (cbd); + break; + } + } else { + /* Check the next backend */ + switch (bk->protocol) { + case MAP_PROTO_HTTP: + case MAP_PROTO_HTTPS: + rspamd_map_http_check_callback (cbd); + break; + case MAP_PROTO_FILE: + rspamd_map_file_check_callback (cbd); + break; + case MAP_PROTO_STATIC: + rspamd_map_static_check_callback (cbd); + break; + } + } + } +} + +static void +rspamd_map_on_stat (struct ev_loop *loop, ev_stat *w, int revents) +{ + struct rspamd_map *map = (struct rspamd_map *)w->data; + + if (w->attr.st_nlink > 0) { + + if (w->attr.st_mtime > w->prev.st_mtime) { + msg_info_map ("old mtime is %t (size = %Hz), " + "new mtime is %t (size = %Hz) for map file %s", + w->prev.st_mtime, (gsize)w->prev.st_size, + w->attr.st_mtime, (gsize)w->attr.st_size, + w->path); + + /* Fire need modify flag */ + struct rspamd_map_backend *bk; + guint i; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + if (bk->protocol == MAP_PROTO_FILE) { + bk->data.fd->need_modify = TRUE; + } + } + + map->next_check = 0; + + if (map->scheduled_check) { + ev_timer_stop (map->event_loop, &map->scheduled_check->ev); + MAP_RELEASE (map->scheduled_check, "rspamd_map_on_stat"); + map->scheduled_check = NULL; + } + + rspamd_map_schedule_periodic (map, RSPAMD_MAP_SCHEDULE_INIT); + } + } +} + +/* Start watching event for all maps */ +void +rspamd_map_watch (struct rspamd_config *cfg, + struct ev_loop *event_loop, + struct rspamd_dns_resolver *resolver, + struct rspamd_worker *worker, + enum rspamd_map_watch_type how) +{ + GList *cur = cfg->maps; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + guint i; + + g_assert (how > RSPAMD_MAP_WATCH_MIN && how < RSPAMD_MAP_WATCH_MAX); + + /* First of all do synced read of data */ + while (cur) { + map = cur->data; + map->event_loop = event_loop; + map->r = resolver; + + if (map->wrk == NULL && how != RSPAMD_MAP_WATCH_WORKER) { + /* Generic scanner map */ + map->wrk = worker; + + if (how == RSPAMD_MAP_WATCH_PRIMARY_CONTROLLER) { + map->active_http = TRUE; + } + else { + map->active_http = FALSE; + } + } + else if (map->wrk != NULL && map->wrk == worker) { + /* Map is bound to a specific worker */ + map->active_http = TRUE; + } + else { + /* Skip map for this worker as irrelevant */ + cur = g_list_next (cur); + continue; + } + + if (!map->active_http) { + /* Check cached version more frequently as it is cheap */ + + if (map->poll_timeout >= cfg->map_timeout && + cfg->map_file_watch_multiplier < 1.0) { + map->poll_timeout = + map->poll_timeout * cfg->map_file_watch_multiplier; + } + } + + map->file_only = TRUE; + map->static_only = TRUE; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + bk->event_loop = event_loop; + + if (bk->protocol == MAP_PROTO_FILE) { + struct file_map_data *data; + + data = bk->data.fd; + + ev_stat_init (&data->st_ev, rspamd_map_on_stat, + data->filename, map->poll_timeout * cfg->map_file_watch_multiplier); + data->st_ev.data = map; + ev_stat_start (event_loop, &data->st_ev); + map->static_only = FALSE; + } + else if ((bk->protocol == MAP_PROTO_HTTP || + bk->protocol == MAP_PROTO_HTTPS)) { + if (map->active_http) { + map->non_trivial = TRUE; + } + + map->static_only = FALSE; + map->file_only = FALSE; + } + } + + rspamd_map_schedule_periodic (map, RSPAMD_MAP_SCHEDULE_INIT); + + cur = g_list_next (cur); + } +} + +void +rspamd_map_preload (struct rspamd_config *cfg) +{ + GList *cur = cfg->maps; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + guint i; + gboolean map_ok; + + /* First of all do synced read of data */ + while (cur) { + map = cur->data; + map_ok = TRUE; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + if (!(bk->protocol == MAP_PROTO_FILE || + bk->protocol == MAP_PROTO_STATIC)) { + + if (bk->protocol == MAP_PROTO_HTTP || + bk->protocol == MAP_PROTO_HTTPS) { + if (!rspamd_map_has_http_cached_file (map, bk)) { + + if (!map->fallback_backend) { + map_ok = FALSE; + } + break; + } + else { + continue; /* We are yet fine */ + } + } + map_ok = FALSE; + break; + } + } + + if (map_ok) { + struct map_periodic_cbdata fake_cbd; + gboolean succeed = TRUE; + + memset (&fake_cbd, 0, sizeof (fake_cbd)); + fake_cbd.cbdata.state = 0; + fake_cbd.cbdata.prev_data = *map->user_data; + fake_cbd.cbdata.cur_data = NULL; + fake_cbd.cbdata.map = map; + fake_cbd.map = map; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + fake_cbd.cur_backend = i; + + if (bk->protocol == MAP_PROTO_FILE) { + if (!read_map_file (map, bk->data.fd, bk, &fake_cbd)) { + succeed = FALSE; + break; + } + } + else if (bk->protocol == MAP_PROTO_STATIC) { + if (!read_map_static (map, bk->data.sd, bk, &fake_cbd)) { + succeed = FALSE; + break; + } + } + else if (bk->protocol == MAP_PROTO_HTTP || + bk->protocol == MAP_PROTO_HTTPS) { + if (!rspamd_map_read_http_cached_file (map, bk, bk->data.hd, + &fake_cbd.cbdata)) { + + if (map->fallback_backend) { + /* Try fallback */ + g_assert (map->fallback_backend->protocol == + MAP_PROTO_FILE); + if (!read_map_file (map, + map->fallback_backend->data.fd, + map->fallback_backend, &fake_cbd)) { + succeed = FALSE; + break; + } + } + else { + succeed = FALSE; + break; + } + } + } + else { + g_assert_not_reached (); + } + } + + if (succeed) { + map->fin_callback (&fake_cbd.cbdata, map->user_data); + } + else { + msg_info_map ("preload of %s failed", map->name); + } + + } + + cur = g_list_next (cur); + } +} + +void +rspamd_map_remove_all (struct rspamd_config *cfg) +{ + struct rspamd_map *map; + GList *cur; + struct rspamd_map_backend *bk; + struct map_cb_data cbdata; + guint i; + + for (cur = cfg->maps; cur != NULL; cur = g_list_next (cur)) { + map = cur->data; + + if (map->tmp_dtor) { + map->tmp_dtor (map->tmp_dtor_data); + } + + if (map->dtor) { + cbdata.prev_data = NULL; + cbdata.map = map; + cbdata.cur_data = *map->user_data; + + map->dtor (&cbdata); + *map->user_data = NULL; + } + + for (i = 0; i < map->backends->len; i ++) { + bk = g_ptr_array_index (map->backends, i); + + MAP_RELEASE (bk, "rspamd_map_backend"); + } + + if (map->fallback_backend) { + MAP_RELEASE (map->fallback_backend, "rspamd_map_backend"); + } + } + + g_list_free (cfg->maps); + cfg->maps = NULL; +} + +static const gchar * +rspamd_map_check_proto (struct rspamd_config *cfg, + const gchar *map_line, struct rspamd_map_backend *bk) +{ + const gchar *pos = map_line, *end, *end_key; + + g_assert (bk != NULL); + g_assert (pos != NULL); + + end = pos + strlen (pos); + + /* Static check */ + if (g_ascii_strcasecmp (pos, "static") == 0) { + bk->protocol = MAP_PROTO_STATIC; + bk->uri = g_strdup (pos); + + return pos; + } + else if (g_ascii_strcasecmp (pos, "zst+static") == 0) { + bk->protocol = MAP_PROTO_STATIC; + bk->uri = g_strdup (pos + 4); + bk->is_compressed = TRUE; + + return pos + 4; + } + + for (;;) { + if (g_ascii_strncasecmp (pos, "sign+", sizeof ("sign+") - 1) == 0) { + bk->is_signed = TRUE; + pos += sizeof ("sign+") - 1; + } + else if (g_ascii_strncasecmp (pos, "fallback+", sizeof ("fallback+") - 1) == 0) { + bk->is_fallback = TRUE; + pos += sizeof ("fallback+") - 1; + } + else if (g_ascii_strncasecmp (pos, "key=", sizeof ("key=") - 1) == 0) { + pos += sizeof ("key=") - 1; + end_key = memchr (pos, '+', end - pos); + + if (end_key != NULL) { + bk->trusted_pubkey = rspamd_pubkey_from_base32 (pos, end_key - pos, + RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519); + + if (bk->trusted_pubkey == NULL) { + msg_err_config ("cannot read pubkey from map: %s", + map_line); + return NULL; + } + pos = end_key + 1; + } else if (end - pos > 64) { + /* Try hex encoding */ + bk->trusted_pubkey = rspamd_pubkey_from_hex (pos, 64, + RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519); + + if (bk->trusted_pubkey == NULL) { + msg_err_config ("cannot read pubkey from map: %s", + map_line); + return NULL; + } + pos += 64; + } else { + msg_err_config ("cannot read pubkey from map: %s", + map_line); + return NULL; + } + + if (*pos == '+' || *pos == ':') { + pos++; + } + } + else { + /* No known flags */ + break; + } + } + + bk->protocol = MAP_PROTO_FILE; + + if (g_ascii_strncasecmp (pos, "http://", sizeof ("http://") - 1) == 0) { + bk->protocol = MAP_PROTO_HTTP; + /* Include http:// */ + bk->uri = g_strdup (pos); + pos += sizeof ("http://") - 1; + } + else if (g_ascii_strncasecmp (pos, "https://", sizeof ("https://") - 1) == 0) { + bk->protocol = MAP_PROTO_HTTPS; + /* Include https:// */ + bk->uri = g_strdup (pos); + pos += sizeof ("https://") - 1; + } + else if (g_ascii_strncasecmp (pos, "file://", sizeof ("file://") - 1) == 0) { + pos += sizeof ("file://") - 1; + /* Exclude file:// */ + bk->uri = g_strdup (pos); + } + else if (*pos == '/') { + /* Trivial file case */ + bk->uri = g_strdup (pos); + } + else { + msg_err_config ("invalid map fetching protocol: %s", map_line); + + return NULL; + } + + if (bk->protocol != MAP_PROTO_FILE && bk->is_signed) { + msg_err_config ("signed maps are no longer supported for HTTP(s): %s", map_line); + } + + return pos; +} + +gboolean +rspamd_map_is_map (const gchar *map_line) +{ + gboolean ret = FALSE; + + g_assert (map_line != NULL); + + if (map_line[0] == '/') { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "sign+", sizeof ("sign+") - 1) == 0) { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "fallback+", sizeof ("fallback+") - 1) == 0) { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "file://", sizeof ("file://") - 1) == 0) { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "http://", sizeof ("http://") - 1) == 0) { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "https://", sizeof ("https://") - 1) == 0) { + ret = TRUE; + } + + return ret; +} + +static void +rspamd_map_backend_dtor (struct rspamd_map_backend *bk) +{ + g_free (bk->uri); + + switch (bk->protocol) { + case MAP_PROTO_FILE: + if (bk->data.fd) { + ev_stat_stop (bk->event_loop, &bk->data.fd->st_ev); + g_free (bk->data.fd->filename); + g_free (bk->data.fd); + } + break; + case MAP_PROTO_STATIC: + if (bk->data.sd) { + if (bk->data.sd->data) { + g_free (bk->data.sd->data); + } + + g_free (bk->data.sd); + } + break; + case MAP_PROTO_HTTP: + case MAP_PROTO_HTTPS: + if (bk->data.hd) { + struct http_map_data *data = bk->data.hd; + + g_free (data->host); + g_free (data->path); + g_free (data->rest); + + if (data->userinfo) { + g_free (data->userinfo); + } + + if (data->etag) { + rspamd_fstring_free (data->etag); + } + + if (g_atomic_int_compare_and_exchange (&data->cache->available, 1, 0)) { + if (data->cur_cache_cbd) { + MAP_RELEASE (data->cur_cache_cbd->shm, + "rspamd_http_map_cached_cbdata"); + ev_timer_stop (data->cur_cache_cbd->event_loop, + &data->cur_cache_cbd->timeout); + g_free (data->cur_cache_cbd); + data->cur_cache_cbd = NULL; + } + + unlink (data->cache->shmem_name); + } + + g_free (bk->data.hd); + } + break; + } + + if (bk->trusted_pubkey) { + rspamd_pubkey_unref (bk->trusted_pubkey); + } + + g_free (bk); +} + +static struct rspamd_map_backend * +rspamd_map_parse_backend (struct rspamd_config *cfg, const gchar *map_line) +{ + struct rspamd_map_backend *bk; + struct file_map_data *fdata = NULL; + struct http_map_data *hdata = NULL; + struct static_map_data *sdata = NULL; + struct http_parser_url up; + const gchar *end, *p; + rspamd_ftok_t tok; + + bk = g_malloc0 (sizeof (*bk)); + REF_INIT_RETAIN (bk, rspamd_map_backend_dtor); + + if (!rspamd_map_check_proto (cfg, map_line, bk)) { + goto err; + } + + if (bk->is_fallback && bk->protocol != MAP_PROTO_FILE) { + msg_err_config ("fallback backend must be file for %s", bk->uri); + + goto err; + } + + end = map_line + strlen (map_line); + if (end - map_line > 5) { + p = end - 5; + if (g_ascii_strcasecmp (p, ".zstd") == 0) { + bk->is_compressed = TRUE; + } + p = end - 4; + if (g_ascii_strcasecmp (p, ".zst") == 0) { + bk->is_compressed = TRUE; + } + } + + /* Now check for each proto separately */ + if (bk->protocol == MAP_PROTO_FILE) { + fdata = g_malloc0 (sizeof (struct file_map_data)); + + if (access (bk->uri, R_OK) == -1) { + if (errno != ENOENT) { + msg_err_config ("cannot open file '%s': %s", bk->uri, strerror (errno)); + goto err; + } + + msg_info_config ( + "map '%s' is not found, but it can be loaded automatically later", + bk->uri); + } + + fdata->filename = g_strdup (bk->uri); + bk->data.fd = fdata; + } + else if (bk->protocol == MAP_PROTO_HTTP || bk->protocol == MAP_PROTO_HTTPS) { + hdata = g_malloc0 (sizeof (struct http_map_data)); + + memset (&up, 0, sizeof (up)); + if (http_parser_parse_url (bk->uri, strlen (bk->uri), FALSE, + &up) != 0) { + msg_err_config ("cannot parse HTTP url: %s", bk->uri); + goto err; + } + else { + if (!(up.field_set & 1u << UF_HOST)) { + msg_err_config ("cannot parse HTTP url: %s: no host", bk->uri); + goto err; + } + + tok.begin = bk->uri + up.field_data[UF_HOST].off; + tok.len = up.field_data[UF_HOST].len; + hdata->host = rspamd_ftokdup (&tok); + + if (up.field_set & (1u << UF_PORT)) { + hdata->port = up.port; + } + else { + if (bk->protocol == MAP_PROTO_HTTP) { + hdata->port = 80; + } + else { + hdata->port = 443; + } + } + + if (up.field_set & (1u << UF_PATH)) { + tok.begin = bk->uri + up.field_data[UF_PATH].off; + tok.len = up.field_data[UF_PATH].len; + + hdata->path = rspamd_ftokdup (&tok); + + /* We also need to check query + fragment */ + if (up.field_set & ((1u << UF_QUERY) | (1u << UF_FRAGMENT))) { + tok.begin = bk->uri + up.field_data[UF_PATH].off + + up.field_data[UF_PATH].len; + tok.len = strlen (tok.begin); + hdata->rest = rspamd_ftokdup (&tok); + } + else { + hdata->rest = g_strdup (""); + } + } + + if (up.field_set & (1u << UF_USERINFO)) { + /* Create authorisation header for basic auth */ + guint len = sizeof ("Basic ") + + up.field_data[UF_USERINFO].len * 8 / 5 + 4; + hdata->userinfo = g_malloc (len); + rspamd_snprintf (hdata->userinfo, len, "Basic %*Bs", + (int)up.field_data[UF_USERINFO].len, + bk->uri + up.field_data[UF_USERINFO].off); + } + } + + hdata->cache = rspamd_mempool_alloc0_shared (cfg->cfg_pool, + sizeof (*hdata->cache)); + + bk->data.hd = hdata; + } + else if (bk->protocol == MAP_PROTO_STATIC) { + sdata = g_malloc0 (sizeof (*sdata)); + bk->data.sd = sdata; + } + + bk->id = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_T1HA, + bk->uri, strlen (bk->uri), 0xdeadbabe); + + return bk; + +err: + MAP_RELEASE (bk, "rspamd_map_backend"); + + if (hdata) { + g_free (hdata); + } + + return NULL; +} + +static void +rspamd_map_calculate_hash (struct rspamd_map *map) +{ + struct rspamd_map_backend *bk; + guint i; + rspamd_cryptobox_hash_state_t st; + gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES]; + + rspamd_cryptobox_hash_init (&st, NULL, 0); + + for (i = 0; i < map->backends->len; i ++) { + bk = g_ptr_array_index (map->backends, i); + rspamd_cryptobox_hash_update (&st, bk->uri, strlen (bk->uri)); + } + + rspamd_cryptobox_hash_final (&st, cksum); + cksum_encoded = rspamd_encode_base32 (cksum, sizeof (cksum)); + rspamd_strlcpy (map->tag, cksum_encoded, sizeof (map->tag)); + g_free (cksum_encoded); +} + +static gboolean +rspamd_map_add_static_string (struct rspamd_config *cfg, + const ucl_object_t *elt, + GString *target) +{ + gsize sz; + const gchar *dline; + + if (ucl_object_type (elt) != UCL_STRING) { + msg_err_config ("map has static backend but `data` is " + "not string like: %s", + ucl_object_type_to_string (elt->type)); + return FALSE; + } + + /* Otherwise, we copy data to the backend */ + dline = ucl_object_tolstring (elt, &sz); + + if (sz == 0) { + msg_err_config ("map has static backend but empty no data"); + return FALSE; + } + + g_string_append_len (target, dline, sz); + g_string_append_c (target, '\n'); + + return TRUE; +} + +struct rspamd_map * +rspamd_map_add (struct rspamd_config *cfg, + const gchar *map_line, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + map_dtor_t dtor, + void **user_data, + struct rspamd_worker *worker) +{ + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + bk = rspamd_map_parse_backend (cfg, map_line); + if (bk == NULL) { + return NULL; + } + + if (bk->is_fallback) { + msg_err_config ("cannot add map with fallback only backend: %s", bk->uri); + REF_RELEASE (bk); + + return NULL; + } + + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_map)); + map->read_callback = read_callback; + map->fin_callback = fin_callback; + map->dtor = dtor; + map->user_data = user_data; + map->cfg = cfg; + map->id = rspamd_random_uint64_fast (); + map->locked = + rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + map->backends = g_ptr_array_sized_new (1); + map->wrk = worker; + rspamd_mempool_add_destructor (cfg->cfg_pool, rspamd_ptr_array_free_hard, + map->backends); + g_ptr_array_add (map->backends, bk); + map->name = rspamd_mempool_strdup (cfg->cfg_pool, map_line); + + if (bk->protocol == MAP_PROTO_FILE) { + map->poll_timeout = (cfg->map_timeout * cfg->map_file_watch_multiplier); + } else { + map->poll_timeout = cfg->map_timeout; + } + + if (description != NULL) { + map->description = rspamd_mempool_strdup (cfg->cfg_pool, description); + } + + rspamd_map_calculate_hash (map); + msg_info_map ("added map %s", bk->uri); + + cfg->maps = g_list_prepend (cfg->maps, map); + + return map; +} + +static inline void +rspamd_map_add_backend (struct rspamd_map *map, struct rspamd_map_backend *bk) +{ + if (bk->is_fallback) { + if (map->fallback_backend) { + msg_warn_map ("redefining fallback backend from %s to %s", + map->fallback_backend->uri, bk->uri); + } + + map->fallback_backend = bk; + } + else { + g_ptr_array_add (map->backends, bk); + } +} + +struct rspamd_map* +rspamd_map_add_from_ucl (struct rspamd_config *cfg, + const ucl_object_t *obj, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + map_dtor_t dtor, + void **user_data, + struct rspamd_worker *worker) +{ + ucl_object_iter_t it = NULL; + const ucl_object_t *cur, *elt; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + guint i; + + g_assert (obj != NULL); + + if (ucl_object_type (obj) == UCL_STRING) { + /* Just a plain string */ + return rspamd_map_add (cfg, ucl_object_tostring (obj), description, + read_callback, fin_callback, dtor, user_data, worker); + } + + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_map)); + map->read_callback = read_callback; + map->fin_callback = fin_callback; + map->dtor = dtor; + map->user_data = user_data; + map->cfg = cfg; + map->id = rspamd_random_uint64_fast (); + map->locked = + rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + map->backends = g_ptr_array_new (); + map->wrk = worker; + rspamd_mempool_add_destructor (cfg->cfg_pool, rspamd_ptr_array_free_hard, + map->backends); + map->poll_timeout = cfg->map_timeout; + + if (description) { + map->description = rspamd_mempool_strdup (cfg->cfg_pool, description); + } + + if (ucl_object_type (obj) == UCL_ARRAY) { + /* Add array of maps as multiple backends */ + while ((cur = ucl_object_iterate (obj, &it, true)) != NULL) { + if (ucl_object_type (cur) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur)); + + if (bk != NULL) { + rspamd_map_add_backend (map, bk); + + if (!map->name) { + map->name = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (cur)); + } + } + } + else { + msg_err_config ("bad map element type: %s", + ucl_object_type_to_string (ucl_object_type (cur))); + } + } + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded: empty list"); + goto err; + } + } + else if (ucl_object_type (obj) == UCL_OBJECT) { + elt = ucl_object_lookup (obj, "name"); + if (elt && ucl_object_type (elt) == UCL_STRING) { + map->name = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (elt)); + } + + elt = ucl_object_lookup (obj, "description"); + if (elt && ucl_object_type (elt) == UCL_STRING) { + map->description = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (elt)); + } + + elt = ucl_object_lookup_any (obj, "timeout", "poll", "poll_time", + "watch_interval", NULL); + if (elt) { + map->poll_timeout = ucl_object_todouble (elt); + } + + elt = ucl_object_lookup_any (obj, "upstreams", "url", "urls", NULL); + if (elt == NULL) { + msg_err_config ("map has no urls to be loaded: no elt"); + goto err; + } + + if (ucl_object_type (elt) == UCL_ARRAY) { + /* Add array of maps as multiple backends */ + it = ucl_object_iterate_new (elt); + + while ((cur = ucl_object_iterate_safe (it, true)) != NULL) { + if (ucl_object_type (cur) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur)); + + if (bk != NULL) { + rspamd_map_add_backend (map, bk); + + if (!map->name) { + map->name = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (cur)); + } + } + } + else { + msg_err_config ("bad map element type: %s", + ucl_object_type_to_string (ucl_object_type (cur))); + ucl_object_iterate_free (it); + goto err; + } + } + + ucl_object_iterate_free (it); + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded: empty object list"); + goto err; + } + } + else if (ucl_object_type (elt) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (elt)); + + if (bk != NULL) { + rspamd_map_add_backend (map, bk); + + if (!map->name) { + map->name = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (elt)); + } + } + } + + if (!map->backends || map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded: no valid backends"); + goto err; + } + } + else { + msg_err_config ("map has invalid type for value: %s", + ucl_object_type_to_string (ucl_object_type (obj))); + goto err; + } + + gboolean all_local = TRUE; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + if (bk->protocol == MAP_PROTO_STATIC) { + GString *map_data; + /* We need data field in ucl */ + elt = ucl_object_lookup (obj, "data"); + + if (elt == NULL) { + msg_err_config ("map has static backend but no `data` field"); + goto err; + } + + + if (ucl_object_type (elt) == UCL_STRING) { + map_data = g_string_sized_new (32); + + if (rspamd_map_add_static_string (cfg, elt, map_data)) { + bk->data.sd->data = map_data->str; + bk->data.sd->len = map_data->len; + g_string_free (map_data, FALSE); + } + else { + g_string_free (map_data, TRUE); + msg_err_config ("map has static backend with invalid `data` field"); + goto err; + } + } + else if (ucl_object_type (elt) == UCL_ARRAY) { + map_data = g_string_sized_new (32); + it = ucl_object_iterate_new (elt); + + while ((cur = ucl_object_iterate_safe (it, true))) { + if (!rspamd_map_add_static_string (cfg, cur, map_data)) { + g_string_free (map_data, TRUE); + msg_err_config ("map has static backend with invalid " + "`data` field"); + ucl_object_iterate_free (it); + goto err; + } + } + + ucl_object_iterate_free (it); + bk->data.sd->data = map_data->str; + bk->data.sd->len = map_data->len; + g_string_free (map_data, FALSE); + } + } + else if (bk->protocol != MAP_PROTO_FILE) { + all_local = FALSE; + } + } + + if (all_local) { + map->poll_timeout = (map->poll_timeout * + cfg->map_file_watch_multiplier); + } + + rspamd_map_calculate_hash (map); + msg_debug_map ("added map from ucl"); + + cfg->maps = g_list_prepend (cfg->maps, map); + + return map; + +err: + + if (map) { + PTR_ARRAY_FOREACH (map->backends, i, bk) { + MAP_RELEASE (bk, "rspamd_map_backend"); + } + } + + return NULL; +} + +rspamd_map_traverse_function +rspamd_map_get_traverse_function (struct rspamd_map *map) +{ + if (map) { + return map->traverse_function; + } + + return NULL; +} + +void +rspamd_map_traverse (struct rspamd_map *map, rspamd_map_traverse_cb cb, + gpointer cbdata, gboolean reset_hits) +{ + if (*map->user_data && map->traverse_function) { + map->traverse_function (*map->user_data, cb, cbdata, reset_hits); + } +} diff --git a/src/libserver/maps/map.h b/src/libserver/maps/map.h new file mode 100644 index 000000000..ce49bacbb --- /dev/null +++ b/src/libserver/maps/map.h @@ -0,0 +1,138 @@ +#ifndef RSPAMD_MAP_H +#define RSPAMD_MAP_H + +#include "config.h" +#include "contrib/libev/ev.h" + +#include "ucl.h" +#include "mem_pool.h" +#include "radix.h" +#include "dns.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Maps API is designed to load lists data from different dynamic sources. + * It monitor files and HTTP locations for modifications and reload them if they are + * modified. + */ +struct map_cb_data; +struct rspamd_worker; + +/** + * Callback types + */ +typedef gchar *(*map_cb_t) (gchar *chunk, gint len, + struct map_cb_data *data, gboolean final); + +typedef void (*map_fin_cb_t) (struct map_cb_data *data, void **target); + +typedef void (*map_dtor_t) (struct map_cb_data *data); + +typedef gboolean (*rspamd_map_traverse_cb) (gconstpointer key, + gconstpointer value, gsize hits, gpointer ud); + +typedef void (*rspamd_map_traverse_function) (void *data, + rspamd_map_traverse_cb cb, + gpointer cbdata, gboolean reset_hits); + +/** + * Common map object + */ +struct rspamd_config; +struct rspamd_map; + +/** + * Callback data for async load + */ +struct map_cb_data { + struct rspamd_map *map; + gint state; + void *prev_data; + void *cur_data; +}; + +/** + * Returns TRUE if line looks like a map definition + * @param map_line + * @return + */ +gboolean rspamd_map_is_map (const gchar *map_line); + +/** + * Add map from line + */ +struct rspamd_map *rspamd_map_add (struct rspamd_config *cfg, + const gchar *map_line, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + map_dtor_t dtor, + void **user_data, + struct rspamd_worker *worker); + +/** + * Add map from ucl + */ +struct rspamd_map *rspamd_map_add_from_ucl (struct rspamd_config *cfg, + const ucl_object_t *obj, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + map_dtor_t dtor, + void **user_data, + struct rspamd_worker *worker); + +enum rspamd_map_watch_type { + RSPAMD_MAP_WATCH_MIN = 9, + RSPAMD_MAP_WATCH_PRIMARY_CONTROLLER, + RSPAMD_MAP_WATCH_SCANNER, + RSPAMD_MAP_WATCH_WORKER, + RSPAMD_MAP_WATCH_MAX +}; + +/** + * Start watching of maps by adding events to libevent event loop + */ +void rspamd_map_watch (struct rspamd_config *cfg, + struct ev_loop *event_loop, + struct rspamd_dns_resolver *resolver, + struct rspamd_worker *worker, + enum rspamd_map_watch_type how); + +/** + * Preloads maps where all backends are file + * @param cfg + */ +void rspamd_map_preload (struct rspamd_config *cfg); + +/** + * Remove all maps watched (remove events) + */ +void rspamd_map_remove_all (struct rspamd_config *cfg); + +/** + * Get traverse function for specific map + * @param map + * @return + */ +rspamd_map_traverse_function rspamd_map_get_traverse_function (struct rspamd_map *map); + +/** + * Perform map traverse + * @param map + * @param cb + * @param cbdata + * @param reset_hits + * @return + */ +void rspamd_map_traverse (struct rspamd_map *map, rspamd_map_traverse_cb cb, + gpointer cbdata, gboolean reset_hits); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libserver/maps/map_helpers.c b/src/libserver/maps/map_helpers.c new file mode 100644 index 000000000..d179d44f5 --- /dev/null +++ b/src/libserver/maps/map_helpers.c @@ -0,0 +1,1397 @@ +/*- + * Copyright 2018 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "map_helpers.h" +#include "map_private.h" +#include "khash.h" +#include "radix.h" +#include "rspamd.h" +#include "cryptobox.h" +#include "contrib/fastutf8/fastutf8.h" + +#ifdef WITH_HYPERSCAN +#include "hs.h" +#endif +#ifndef WITH_PCRE2 +#include <pcre.h> +#else +#include <pcre2.h> +#endif + + +static const guint64 map_hash_seed = 0xdeadbabeULL; +static const gchar *hash_fill = "1"; + +struct rspamd_map_helper_value { + gsize hits; + gconstpointer key; + gchar value[]; /* Null terminated */ +}; + +KHASH_INIT (rspamd_map_hash, const gchar *, + struct rspamd_map_helper_value *, true, + rspamd_strcase_hash, rspamd_strcase_equal); + +struct rspamd_radix_map_helper { + rspamd_mempool_t *pool; + khash_t(rspamd_map_hash) *htb; + radix_compressed_t *trie; + rspamd_cryptobox_fast_hash_state_t hst; +}; + +struct rspamd_hash_map_helper { + rspamd_mempool_t *pool; + khash_t(rspamd_map_hash) *htb; + rspamd_cryptobox_fast_hash_state_t hst; +}; + +struct rspamd_regexp_map_helper { + rspamd_mempool_t *pool; + struct rspamd_map *map; + GPtrArray *regexps; + GPtrArray *values; + khash_t(rspamd_map_hash) *htb; + rspamd_cryptobox_fast_hash_state_t hst; + enum rspamd_regexp_map_flags map_flags; +#ifdef WITH_HYPERSCAN + hs_database_t *hs_db; + hs_scratch_t *hs_scratch; + gchar **patterns; + gint *flags; + gint *ids; +#endif +}; + +/** + * FSM for parsing lists + */ + +#define MAP_STORE_KEY do { \ + while (g_ascii_isspace (*c) && p > c) { c ++; } \ + key = g_malloc (p - c + 1); \ + rspamd_strlcpy (key, c, p - c + 1); \ + key = g_strstrip (key); \ +} while (0) + +#define MAP_STORE_VALUE do { \ + while (g_ascii_isspace (*c) && p > c) { c ++; } \ + value = g_malloc (p - c + 1); \ + rspamd_strlcpy (value, c, p - c + 1); \ + value = g_strstrip (value); \ +} while (0) + +gchar * +rspamd_parse_kv_list ( + gchar * chunk, + gint len, + struct map_cb_data *data, + insert_func func, + const gchar *default_value, + gboolean final) +{ + enum { + map_skip_spaces_before_key = 0, + map_read_key, + map_read_key_quoted, + map_read_key_slashed, + map_skip_spaces_after_key, + map_backslash_quoted, + map_backslash_slashed, + map_read_key_after_slash, + map_read_value, + map_read_comment_start, + map_skip_comment, + map_read_eol, + }; + + gchar *c, *p, *key = NULL, *value = NULL, *end; + struct rspamd_map *map = data->map; + guint line_number = 0; + + p = chunk; + c = p; + end = p + len; + + while (p < end) { + switch (data->state) { + case map_skip_spaces_before_key: + if (g_ascii_isspace (*p)) { + p ++; + } + else { + if (*p == '"') { + p++; + c = p; + data->state = map_read_key_quoted; + } + else if (*p == '/') { + /* Note that c is on '/' here as '/' is a part of key */ + c = p; + p++; + data->state = map_read_key_slashed; + } + else { + c = p; + data->state = map_read_key; + } + } + break; + case map_read_key: + /* read key */ + /* Check here comments, eol and end of buffer */ + if (*p == '#' && (p == c || *(p - 1) != '\\')) { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + } + + key = NULL; + data->state = map_read_comment_start; + } + else if (*p == '\r' || *p == '\n') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + } + + data->state = map_read_eol; + key = NULL; + } + else if (g_ascii_isspace (*p)) { + if (p - c > 0) { + MAP_STORE_KEY; + data->state = map_skip_spaces_after_key; + } + else { + msg_err_map ("empty or invalid key found on line %d", line_number); + data->state = map_skip_comment; + } + } + else { + p++; + } + break; + case map_read_key_quoted: + if (*p == '\\') { + data->state = map_backslash_quoted; + p ++; + } + else if (*p == '"') { + /* Allow empty keys in this case */ + if (p - c >= 0) { + MAP_STORE_KEY; + data->state = map_skip_spaces_after_key; + } + else { + g_assert_not_reached (); + } + p ++; + } + else { + p ++; + } + break; + case map_read_key_slashed: + if (*p == '\\') { + data->state = map_backslash_slashed; + p ++; + } + else if (*p == '/') { + /* Allow empty keys in this case */ + if (p - c >= 0) { + data->state = map_read_key_after_slash; + } + else { + g_assert_not_reached (); + } + } + else { + p ++; + } + break; + case map_read_key_after_slash: + /* + * This state is equal to reading of key but '/' is not + * treated specially + */ + if (*p == '#') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + key = NULL; + } + + data->state = map_read_comment_start; + } + else if (*p == '\r' || *p == '\n') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + key = NULL; + } + + data->state = map_read_eol; + key = NULL; + } + else if (g_ascii_isspace (*p)) { + if (p - c > 0) { + MAP_STORE_KEY; + data->state = map_skip_spaces_after_key; + } + else { + msg_err_map ("empty or invalid key found on line %d", line_number); + data->state = map_skip_comment; + } + } + else { + p ++; + } + break; + case map_backslash_quoted: + p ++; + data->state = map_read_key_quoted; + break; + case map_backslash_slashed: + p ++; + data->state = map_read_key_slashed; + break; + case map_skip_spaces_after_key: + if (*p == ' ' || *p == '\t') { + p ++; + } + else { + c = p; + data->state = map_read_value; + } + break; + case map_read_value: + if (key == NULL) { + /* Ignore line */ + msg_err_map ("empty or invalid key found on line %d", line_number); + data->state = map_skip_comment; + } + else { + if (*p == '#') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_VALUE; + func (data->cur_data, key, value); + msg_debug_map ("insert key value pair: %s -> %s; line: %d", + key, value, line_number); + g_free (key); + g_free (value); + key = NULL; + value = NULL; + } else { + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + key = NULL; + } + + data->state = map_read_comment_start; + } else if (*p == '\r' || *p == '\n') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_VALUE; + func (data->cur_data, key, value); + msg_debug_map ("insert key value pair: %s -> %s", + key, value); + g_free (key); + g_free (value); + key = NULL; + value = NULL; + } else { + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + + data->state = map_read_eol; + key = NULL; + } + else { + p++; + } + } + break; + case map_read_comment_start: + if (*p == '#') { + data->state = map_skip_comment; + p ++; + key = NULL; + value = NULL; + } + else { + g_assert_not_reached (); + } + break; + case map_skip_comment: + if (*p == '\r' || *p == '\n') { + data->state = map_read_eol; + } + else { + p ++; + } + break; + case map_read_eol: + /* Skip \r\n and whitespaces */ + if (*p == '\r' || *p == '\n') { + if (*p == '\n') { + /* We don't care about \r only line separators, they are too rare */ + line_number ++; + } + p++; + } + else { + data->state = map_skip_spaces_before_key; + } + break; + default: + g_assert_not_reached (); + break; + } + } + + if (final) { + /* Examine the state */ + switch (data->state) { + case map_read_key: + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + break; + case map_read_value: + if (key == NULL) { + /* Ignore line */ + msg_err_map ("empty or invalid key found on line %d", line_number); + data->state = map_skip_comment; + } + else { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_VALUE; + func (data->cur_data, key, value); + msg_debug_map ("insert key value pair: %s -> %s", + key, value); + g_free (key); + g_free (value); + key = NULL; + value = NULL; + } else { + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + } + break; + } + + data->state = map_skip_spaces_before_key; + } + + return c; +} + +/** + * Radix tree helper function + */ +void +rspamd_map_helper_insert_radix (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st; + struct rspamd_map_helper_value *val; + gsize vlen; + khiter_t k; + gconstpointer nk; + gint res; + + vlen = strlen (value); + val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) + + vlen + 1); + memcpy (val->value, value, vlen); + + k = kh_get (rspamd_map_hash, r->htb, key); + + if (k == kh_end (r->htb)) { + nk = rspamd_mempool_strdup (r->pool, key); + k = kh_put (rspamd_map_hash, r->htb, nk, &res); + } + + nk = kh_key (r->htb, k); + val->key = nk; + kh_value (r->htb, k) = val; + rspamd_radix_add_iplist (key, ",", r->trie, val, FALSE); + rspamd_cryptobox_fast_hash_update (&r->hst, nk, strlen (nk)); +} + +void +rspamd_map_helper_insert_radix_resolve (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st; + struct rspamd_map_helper_value *val; + gsize vlen; + khiter_t k; + gconstpointer nk; + gint res; + + vlen = strlen (value); + val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) + + vlen + 1); + memcpy (val->value, value, vlen); + + k = kh_get (rspamd_map_hash, r->htb, key); + + if (k == kh_end (r->htb)) { + nk = rspamd_mempool_strdup (r->pool, key); + k = kh_put (rspamd_map_hash, r->htb, nk, &res); + } + + nk = kh_key (r->htb, k); + val->key = nk; + kh_value (r->htb, k) = val; + rspamd_radix_add_iplist (key, ",", r->trie, val, TRUE); + rspamd_cryptobox_fast_hash_update (&r->hst, nk, strlen (nk)); +} + +void +rspamd_map_helper_insert_hash (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_hash_map_helper *ht = st; + struct rspamd_map_helper_value *val; + khiter_t k; + gconstpointer nk; + gsize vlen; + gint r; + + k = kh_get (rspamd_map_hash, ht->htb, key); + vlen = strlen (value); + + if (k == kh_end (ht->htb)) { + nk = rspamd_mempool_strdup (ht->pool, key); + k = kh_put (rspamd_map_hash, ht->htb, nk, &r); + } + else { + val = kh_value (ht->htb, k); + + if (strcmp (value, val->value) == 0) { + /* Same element, skip */ + return; + } + } + + /* Null termination due to alloc0 */ + val = rspamd_mempool_alloc0 (ht->pool, sizeof (*val) + vlen + 1); + memcpy (val->value, value, vlen); + + nk = kh_key (ht->htb, k); + val->key = nk; + kh_value (ht->htb, k) = val; + rspamd_cryptobox_fast_hash_update (&ht->hst, nk, strlen (nk)); +} + +void +rspamd_map_helper_insert_re (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_regexp_map_helper *re_map = st; + struct rspamd_map *map; + rspamd_regexp_t *re; + gchar *escaped; + GError *err = NULL; + gint pcre_flags; + gsize escaped_len; + struct rspamd_map_helper_value *val; + khiter_t k; + gconstpointer nk; + gsize vlen; + gint r; + + map = re_map->map; + + if (re_map->map_flags & RSPAMD_REGEXP_MAP_FLAG_GLOB) { + escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len, + RSPAMD_REGEXP_ESCAPE_GLOB|RSPAMD_REGEXP_ESCAPE_UTF); + re = rspamd_regexp_new (escaped, NULL, &err); + g_free (escaped); + } + else { + re = rspamd_regexp_new (key, NULL, &err); + } + + if (re == NULL) { + msg_err_map ("cannot parse regexp %s: %e", key, err); + + if (err) { + g_error_free (err); + } + + return; + } + + vlen = strlen (value); + val = rspamd_mempool_alloc0 (re_map->pool, sizeof (*val) + + vlen + 1); + memcpy (val->value, value, vlen); + + k = kh_get (rspamd_map_hash, re_map->htb, key); + + if (k == kh_end (re_map->htb)) { + nk = rspamd_mempool_strdup (re_map->pool, key); + k = kh_put (rspamd_map_hash, re_map->htb, nk, &r); + } + + nk = kh_key (re_map->htb, k); + val->key = nk; + kh_value (re_map->htb, k) = val; + rspamd_cryptobox_fast_hash_update (&re_map->hst, nk, strlen (nk)); + + pcre_flags = rspamd_regexp_get_pcre_flags (re); + +#ifndef WITH_PCRE2 + if (pcre_flags & PCRE_FLAG(UTF8)) { + re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF; + } +#else + if (pcre_flags & PCRE_FLAG(UTF)) { + re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF; + } +#endif + + g_ptr_array_add (re_map->regexps, re); + g_ptr_array_add (re_map->values, val); +} + +static void +rspamd_map_helper_traverse_regexp (void *data, + rspamd_map_traverse_cb cb, + gpointer cbdata, + gboolean reset_hits) +{ + gconstpointer k; + struct rspamd_map_helper_value *val; + struct rspamd_regexp_map_helper *re_map = data; + + kh_foreach (re_map->htb, k, val, { + if (!cb (k, val->value, val->hits, cbdata)) { + break; + } + + if (reset_hits) { + val->hits = 0; + } + }); +} + +struct rspamd_hash_map_helper * +rspamd_map_helper_new_hash (struct rspamd_map *map) +{ + struct rspamd_hash_map_helper *htb; + rspamd_mempool_t *pool; + + if (map) { + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + map->tag, 0); + } + else { + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + NULL, 0); + } + + htb = rspamd_mempool_alloc0 (pool, sizeof (*htb)); + htb->htb = kh_init (rspamd_map_hash); + htb->pool = pool; + rspamd_cryptobox_fast_hash_init (&htb->hst, map_hash_seed); + + return htb; +} + +void +rspamd_map_helper_destroy_hash (struct rspamd_hash_map_helper *r) +{ + if (r == NULL || r->pool == NULL) { + return; + } + + rspamd_mempool_t *pool = r->pool; + kh_destroy (rspamd_map_hash, r->htb); + memset (r, 0, sizeof (*r)); + rspamd_mempool_delete (pool); +} + +static void +rspamd_map_helper_traverse_hash (void *data, + rspamd_map_traverse_cb cb, + gpointer cbdata, + gboolean reset_hits) +{ + gconstpointer k; + struct rspamd_map_helper_value *val; + struct rspamd_hash_map_helper *ht = data; + + kh_foreach (ht->htb, k, val, { + if (!cb (k, val->value, val->hits, cbdata)) { + break; + } + + if (reset_hits) { + val->hits = 0; + } + }); +} + +struct rspamd_radix_map_helper * +rspamd_map_helper_new_radix (struct rspamd_map *map) +{ + struct rspamd_radix_map_helper *r; + rspamd_mempool_t *pool; + + if (map) { + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + map->tag, 0); + } + else { + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + NULL, 0); + } + + r = rspamd_mempool_alloc0 (pool, sizeof (*r)); + r->trie = radix_create_compressed_with_pool (pool); + r->htb = kh_init (rspamd_map_hash); + r->pool = pool; + rspamd_cryptobox_fast_hash_init (&r->hst, map_hash_seed); + + return r; +} + +void +rspamd_map_helper_destroy_radix (struct rspamd_radix_map_helper *r) +{ + if (r == NULL || !r->pool) { + return; + } + + kh_destroy (rspamd_map_hash, r->htb); + rspamd_mempool_t *pool = r->pool; + memset (r, 0, sizeof (*r)); + rspamd_mempool_delete (pool); +} + +static void +rspamd_map_helper_traverse_radix (void *data, + rspamd_map_traverse_cb cb, + gpointer cbdata, + gboolean reset_hits) +{ + gconstpointer k; + struct rspamd_map_helper_value *val; + struct rspamd_radix_map_helper *r = data; + + kh_foreach (r->htb, k, val, { + if (!cb (k, val->value, val->hits, cbdata)) { + break; + } + + if (reset_hits) { + val->hits = 0; + } + }); +} + +struct rspamd_regexp_map_helper * +rspamd_map_helper_new_regexp (struct rspamd_map *map, + enum rspamd_regexp_map_flags flags) +{ + struct rspamd_regexp_map_helper *re_map; + rspamd_mempool_t *pool; + + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + map->tag, 0); + + re_map = rspamd_mempool_alloc0 (pool, sizeof (*re_map)); + re_map->pool = pool; + re_map->values = g_ptr_array_new (); + re_map->regexps = g_ptr_array_new (); + re_map->map = map; + re_map->map_flags = flags; + re_map->htb = kh_init (rspamd_map_hash); + rspamd_cryptobox_fast_hash_init (&re_map->hst, map_hash_seed); + + return re_map; +} + + +void +rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map) +{ + rspamd_regexp_t *re; + guint i; + + if (!re_map || !re_map->regexps) { + return; + } + +#ifdef WITH_HYPERSCAN + if (re_map->hs_scratch) { + hs_free_scratch (re_map->hs_scratch); + } + if (re_map->hs_db) { + hs_free_database (re_map->hs_db); + } + if (re_map->patterns) { + for (i = 0; i < re_map->regexps->len; i ++) { + g_free (re_map->patterns[i]); + } + + g_free (re_map->patterns); + } + if (re_map->flags) { + g_free (re_map->flags); + } + if (re_map->ids) { + g_free (re_map->ids); + } +#endif + + for (i = 0; i < re_map->regexps->len; i ++) { + re = g_ptr_array_index (re_map->regexps, i); + rspamd_regexp_unref (re); + } + + g_ptr_array_free (re_map->regexps, TRUE); + g_ptr_array_free (re_map->values, TRUE); + kh_destroy (rspamd_map_hash, re_map->htb); + + rspamd_mempool_t *pool = re_map->pool; + memset (re_map, 0, sizeof (*re_map)); + rspamd_mempool_delete (pool); +} + +gchar * +rspamd_kv_list_read ( + gchar * chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + if (data->cur_data == NULL) { + data->cur_data = rspamd_map_helper_new_hash (data->map); + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_hash, + "", + final); +} + +void +rspamd_kv_list_fin (struct map_cb_data *data, void **target) +{ + struct rspamd_map *map = data->map; + struct rspamd_hash_map_helper *htb; + + if (data->cur_data) { + htb = (struct rspamd_hash_map_helper *)data->cur_data; + msg_info_map ("read hash of %d elements", kh_size (htb->htb)); + data->map->traverse_function = rspamd_map_helper_traverse_hash; + data->map->nelts = kh_size (htb->htb); + data->map->digest = rspamd_cryptobox_fast_hash_final (&htb->hst); + } + + if (target) { + *target = data->cur_data; + } + + if (data->prev_data) { + htb = (struct rspamd_hash_map_helper *)data->prev_data; + rspamd_map_helper_destroy_hash (htb); + } +} + +void +rspamd_kv_list_dtor (struct map_cb_data *data) +{ + struct rspamd_hash_map_helper *htb; + + if (data->cur_data) { + htb = (struct rspamd_hash_map_helper *)data->cur_data; + rspamd_map_helper_destroy_hash (htb); + } +} + +gchar * +rspamd_radix_read ( + gchar * chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_radix_map_helper *r; + struct rspamd_map *map = data->map; + + if (data->cur_data == NULL) { + r = rspamd_map_helper_new_radix (map); + data->cur_data = r; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_radix, + hash_fill, + final); +} + +void +rspamd_radix_fin (struct map_cb_data *data, void **target) +{ + struct rspamd_map *map = data->map; + struct rspamd_radix_map_helper *r; + + if (data->cur_data) { + r = (struct rspamd_radix_map_helper *)data->cur_data; + msg_info_map ("read radix trie of %z elements: %s", + radix_get_size (r->trie), radix_get_info (r->trie)); + data->map->traverse_function = rspamd_map_helper_traverse_radix; + data->map->nelts = kh_size (r->htb); + data->map->digest = rspamd_cryptobox_fast_hash_final (&r->hst); + } + + if (target) { + *target = data->cur_data; + } + + if (data->prev_data) { + r = (struct rspamd_radix_map_helper *)data->prev_data; + rspamd_map_helper_destroy_radix (r); + } +} + +void +rspamd_radix_dtor (struct map_cb_data *data) +{ + struct rspamd_radix_map_helper *r; + + if (data->cur_data) { + r = (struct rspamd_radix_map_helper *)data->cur_data; + rspamd_map_helper_destroy_radix (r); + } +} + +static void +rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map) +{ +#ifdef WITH_HYPERSCAN + guint i; + hs_platform_info_t plt; + hs_compile_error_t *err; + struct rspamd_map *map; + rspamd_regexp_t *re; + gint pcre_flags; + + map = re_map->map; + + if (!(map->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) { + msg_info_map ("disable hyperscan for map %s, ssse3 instructons are not supported by CPU", + map->name); + return; + } + + if (hs_populate_platform (&plt) != HS_SUCCESS) { + msg_err_map ("cannot populate hyperscan platform"); + return; + } + + re_map->patterns = g_new (gchar *, re_map->regexps->len); + re_map->flags = g_new (gint, re_map->regexps->len); + re_map->ids = g_new (gint, re_map->regexps->len); + + for (i = 0; i < re_map->regexps->len; i ++) { + const gchar *pat; + gchar *escaped; + gint pat_flags; + + re = g_ptr_array_index (re_map->regexps, i); + pcre_flags = rspamd_regexp_get_pcre_flags (re); + pat = rspamd_regexp_get_pattern (re); + pat_flags = rspamd_regexp_get_flags (re); + + if (pat_flags & RSPAMD_REGEXP_FLAG_UTF) { + escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL, + RSPAMD_REGEXP_ESCAPE_RE|RSPAMD_REGEXP_ESCAPE_UTF); + re_map->flags[i] |= HS_FLAG_UTF8; + } + else { + escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL, + RSPAMD_REGEXP_ESCAPE_RE); + } + + re_map->patterns[i] = escaped; + re_map->flags[i] = HS_FLAG_SINGLEMATCH; + +#ifndef WITH_PCRE2 + if (pcre_flags & PCRE_FLAG(UTF8)) { + re_map->flags[i] |= HS_FLAG_UTF8; + } +#else + if (pcre_flags & PCRE_FLAG(UTF)) { + re_map->flags[i] |= HS_FLAG_UTF8; + } +#endif + if (pcre_flags & PCRE_FLAG(CASELESS)) { + re_map->flags[i] |= HS_FLAG_CASELESS; + } + if (pcre_flags & PCRE_FLAG(MULTILINE)) { + re_map->flags[i] |= HS_FLAG_MULTILINE; + } + if (pcre_flags & PCRE_FLAG(DOTALL)) { + re_map->flags[i] |= HS_FLAG_DOTALL; + } + if (rspamd_regexp_get_maxhits (re) == 1) { + re_map->flags[i] |= HS_FLAG_SINGLEMATCH; + } + + re_map->ids[i] = i; + } + + if (re_map->regexps->len > 0 && re_map->patterns) { + if (hs_compile_multi ((const gchar **)re_map->patterns, + re_map->flags, + re_map->ids, + re_map->regexps->len, + HS_MODE_BLOCK, + &plt, + &re_map->hs_db, + &err) != HS_SUCCESS) { + + msg_err_map ("cannot create tree of regexp when processing '%s': %s", + err->expression >= 0 ? + re_map->patterns[err->expression] : + "unknown regexp", err->message); + re_map->hs_db = NULL; + hs_free_compile_error (err); + + return; + } + + if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) { + msg_err_map ("cannot allocate scratch space for hyperscan"); + hs_free_database (re_map->hs_db); + re_map->hs_db = NULL; + } + } + else { + msg_err_map ("regexp map is empty"); + } +#endif +} + +gchar * +rspamd_regexp_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_map_helper_new_regexp (data->map, 0); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_re, + hash_fill, + final); +} + +gchar * +rspamd_glob_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_map_helper_new_regexp (data->map, RSPAMD_REGEXP_MAP_FLAG_GLOB); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_re, + hash_fill, + final); +} + +gchar * +rspamd_regexp_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_map_helper_new_regexp (data->map, + RSPAMD_REGEXP_MAP_FLAG_MULTIPLE); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_re, + hash_fill, + final); +} + +gchar * +rspamd_glob_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_map_helper_new_regexp (data->map, + RSPAMD_REGEXP_MAP_FLAG_GLOB|RSPAMD_REGEXP_MAP_FLAG_MULTIPLE); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_re, + hash_fill, + final); +} + + +void +rspamd_regexp_list_fin (struct map_cb_data *data, void **target) +{ + struct rspamd_regexp_map_helper *re_map; + struct rspamd_map *map = data->map; + + if (data->cur_data) { + re_map = data->cur_data; + rspamd_re_map_finalize (re_map); + msg_info_map ("read regexp list of %ud elements", + re_map->regexps->len); + data->map->traverse_function = rspamd_map_helper_traverse_regexp; + data->map->nelts = kh_size (re_map->htb); + data->map->digest = rspamd_cryptobox_fast_hash_final (&re_map->hst); + } + + if (target) { + *target = data->cur_data; + } + + if (data->prev_data) { + rspamd_map_helper_destroy_regexp (data->prev_data); + } +} +void +rspamd_regexp_list_dtor (struct map_cb_data *data) +{ + if (data->cur_data) { + rspamd_map_helper_destroy_regexp (data->cur_data); + } +} + +#ifdef WITH_HYPERSCAN +static int +rspamd_match_hs_single_handler (unsigned int id, unsigned long long from, + unsigned long long to, + unsigned int flags, void *context) +{ + guint *i = context; + /* Always return non-zero as we need a single match here */ + + *i = id; + + return 1; +} +#endif + +gconstpointer +rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len) +{ + guint i; + rspamd_regexp_t *re; + gint res = 0; + gpointer ret = NULL; + struct rspamd_map_helper_value *val; + gboolean validated = FALSE; + + g_assert (in != NULL); + + if (map == NULL || len == 0 || map->regexps == NULL) { + return NULL; + } + + if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) { + if (rspamd_fast_utf8_validate (in, len) == 0) { + validated = TRUE; + } + } + else { + validated = TRUE; + } + +#ifdef WITH_HYPERSCAN + if (map->hs_db && map->hs_scratch) { + + if (validated) { + + res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch, + rspamd_match_hs_single_handler, (void *)&i); + + if (res == HS_SCAN_TERMINATED) { + res = 1; + val = g_ptr_array_index (map->values, i); + + ret = val->value; + val->hits ++; + } + + return ret; + } + } +#endif + + if (!res) { + /* PCRE version */ + for (i = 0; i < map->regexps->len; i ++) { + re = g_ptr_array_index (map->regexps, i); + + if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) { + val = g_ptr_array_index (map->values, i); + + ret = val->value; + val->hits ++; + break; + } + } + } + + return ret; +} + +#ifdef WITH_HYPERSCAN +struct rspamd_multiple_cbdata { + GPtrArray *ar; + struct rspamd_regexp_map_helper *map; +}; + +static int +rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from, + unsigned long long to, + unsigned int flags, void *context) +{ + struct rspamd_multiple_cbdata *cbd = context; + struct rspamd_map_helper_value *val; + + + if (id < cbd->map->values->len) { + val = g_ptr_array_index (cbd->map->values, id); + val->hits ++; + g_ptr_array_add (cbd->ar, val->value); + } + + /* Always return zero as we need all matches here */ + return 0; +} +#endif + +GPtrArray* +rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len) +{ + guint i; + rspamd_regexp_t *re; + GPtrArray *ret; + gint res = 0; + gboolean validated = FALSE; + struct rspamd_map_helper_value *val; + + if (map == NULL || map->regexps == NULL || len == 0) { + return NULL; + } + + g_assert (in != NULL); + + if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) { + if (rspamd_fast_utf8_validate (in, len) == 0) { + validated = TRUE; + } + } + else { + validated = TRUE; + } + + ret = g_ptr_array_new (); + +#ifdef WITH_HYPERSCAN + if (map->hs_db && map->hs_scratch) { + + if (validated) { + struct rspamd_multiple_cbdata cbd; + + cbd.ar = ret; + cbd.map = map; + + if (hs_scan (map->hs_db, in, len, 0, map->hs_scratch, + rspamd_match_hs_multiple_handler, &cbd) == HS_SUCCESS) { + res = 1; + } + } + } +#endif + + if (!res) { + /* PCRE version */ + for (i = 0; i < map->regexps->len; i ++) { + re = g_ptr_array_index (map->regexps, i); + + if (rspamd_regexp_search (re, in, len, NULL, NULL, + !validated, NULL)) { + val = g_ptr_array_index (map->values, i); + val->hits ++; + g_ptr_array_add (ret, val->value); + } + } + } + + if (ret->len > 0) { + return ret; + } + + g_ptr_array_free (ret, TRUE); + + return NULL; +} + +gconstpointer +rspamd_match_hash_map (struct rspamd_hash_map_helper *map, const gchar *in) +{ + khiter_t k; + struct rspamd_map_helper_value *val; + + if (map == NULL || map->htb == NULL) { + return NULL; + } + + k = kh_get (rspamd_map_hash, map->htb, in); + + if (k != kh_end (map->htb)) { + val = kh_value (map->htb, k); + val->hits ++; + + return val->value; + } + + return NULL; +} + +gconstpointer +rspamd_match_radix_map (struct rspamd_radix_map_helper *map, + const guchar *in, gsize inlen) +{ + struct rspamd_map_helper_value *val; + + if (map == NULL || map->trie == NULL) { + return NULL; + } + + val = (struct rspamd_map_helper_value *)radix_find_compressed (map->trie, + in, inlen); + + if (val != (gconstpointer)RADIX_NO_VALUE) { + val->hits ++; + + return val->value; + } + + return NULL; +} + +gconstpointer +rspamd_match_radix_map_addr (struct rspamd_radix_map_helper *map, + const rspamd_inet_addr_t *addr) +{ + struct rspamd_map_helper_value *val; + + if (map == NULL || map->trie == NULL) { + return NULL; + } + + val = (struct rspamd_map_helper_value *)radix_find_compressed_addr (map->trie, addr); + + if (val != (gconstpointer)RADIX_NO_VALUE) { + val->hits ++; + + return val->value; + } + + return NULL; +}
\ No newline at end of file diff --git a/src/libserver/maps/map_helpers.h b/src/libserver/maps/map_helpers.h new file mode 100644 index 000000000..4f7b5b804 --- /dev/null +++ b/src/libserver/maps/map_helpers.h @@ -0,0 +1,246 @@ +/*- + * Copyright 2018 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_MAP_HELPERS_H +#define RSPAMD_MAP_HELPERS_H + +#include "config.h" +#include "map.h" +#include "addr.h" + +/** + * @file map_helpers.h + * + * Defines helper structures to deal with different map types + */ + + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Common structures, abstract for simplicity + */ +struct rspamd_radix_map_helper; +struct rspamd_hash_map_helper; +struct rspamd_regexp_map_helper; +struct rspamd_map_helper_value; + +enum rspamd_regexp_map_flags { + RSPAMD_REGEXP_MAP_FLAG_UTF = (1u << 0), + RSPAMD_REGEXP_MAP_FLAG_MULTIPLE = (1u << 1), + RSPAMD_REGEXP_MAP_FLAG_GLOB = (1u << 2), +}; + +typedef void (*insert_func) (gpointer st, gconstpointer key, + gconstpointer value); + +/** + * Radix list is a list like ip/mask + */ +gchar *rspamd_radix_read ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +void rspamd_radix_fin (struct map_cb_data *data, void **target); + +void rspamd_radix_dtor (struct map_cb_data *data); + +/** + * Kv list is an ordinal list of keys and values separated by whitespace + */ +gchar *rspamd_kv_list_read ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +void rspamd_kv_list_fin (struct map_cb_data *data, void **target); + +void rspamd_kv_list_dtor (struct map_cb_data *data); + +/** + * Regexp list is a list of regular expressions + */ + +gchar *rspamd_regexp_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +gchar *rspamd_regexp_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +gchar *rspamd_glob_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +gchar *rspamd_glob_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +void rspamd_regexp_list_fin (struct map_cb_data *data, void **target); + +void rspamd_regexp_list_dtor (struct map_cb_data *data); + +/** + * FSM for lists parsing (support comments, blank lines and partial replies) + */ +gchar * +rspamd_parse_kv_list ( + gchar *chunk, + gint len, + struct map_cb_data *data, + insert_func func, + const gchar *default_value, + gboolean final); + +/** + * Find a single (any) matching regexp for the specified text or NULL if + * no matches found + * @param map + * @param in + * @param len + * @return + */ +gconstpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len); + +/** + * Find a multiple (all) matching regexp for the specified text or NULL if + * no matches found. Returns GPtrArray that *must* be freed by a caller if not NULL + * @param map + * @param in + * @param len + * @return + */ +GPtrArray *rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len); + +/** + * Find value matching specific key in a hash map + * @param map + * @param in + * @param len + * @return + */ +gconstpointer rspamd_match_hash_map (struct rspamd_hash_map_helper *map, + const gchar *in); + +/** + * Find value matching specific key in a hash map + * @param map + * @param in raw ip address + * @param inlen ip address length (4 for IPv4 and 16 for IPv6) + * @return + */ +gconstpointer rspamd_match_radix_map (struct rspamd_radix_map_helper *map, + const guchar *in, gsize inlen); + +gconstpointer rspamd_match_radix_map_addr (struct rspamd_radix_map_helper *map, + const rspamd_inet_addr_t *addr); + +/** + * Creates radix map helper + * @param map + * @return + */ +struct rspamd_radix_map_helper *rspamd_map_helper_new_radix (struct rspamd_map *map); + +/** + * Inserts new value into radix map + * @param st + * @param key + * @param value + */ +void rspamd_map_helper_insert_radix (gpointer st, gconstpointer key, gconstpointer value); + +/** + * Inserts new value into radix map performing synchronous resolving + * @param st + * @param key + * @param value + */ +void rspamd_map_helper_insert_radix_resolve (gpointer st, gconstpointer key, + gconstpointer value); + +/** + * Destroys radix map helper + * @param r + */ +void rspamd_map_helper_destroy_radix (struct rspamd_radix_map_helper *r); + + +/** + * Creates hash map helper + * @param map + * @return + */ +struct rspamd_hash_map_helper *rspamd_map_helper_new_hash (struct rspamd_map *map); + +/** + * Inserts a new value into a hash map + * @param st + * @param key + * @param value + */ +void rspamd_map_helper_insert_hash (gpointer st, gconstpointer key, gconstpointer value); + +/** + * Destroys hash map helper + * @param r + */ +void rspamd_map_helper_destroy_hash (struct rspamd_hash_map_helper *r); + +/** + * Create new regexp map + * @param map + * @param flags + * @return + */ +struct rspamd_regexp_map_helper *rspamd_map_helper_new_regexp (struct rspamd_map *map, + enum rspamd_regexp_map_flags flags); + +/** + * Inserts a new regexp into regexp map + * @param st + * @param key + * @param value + */ +void rspamd_map_helper_insert_re (gpointer st, gconstpointer key, gconstpointer value); + +/** + * Destroy regexp map + * @param re_map + */ +void rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libserver/maps/map_private.h b/src/libserver/maps/map_private.h new file mode 100644 index 000000000..347f63538 --- /dev/null +++ b/src/libserver/maps/map_private.h @@ -0,0 +1,219 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_MAP_PRIVATE_H_ +#define SRC_LIBUTIL_MAP_PRIVATE_H_ + +#include "config.h" +#include "mem_pool.h" +#include "keypair.h" +#include "unix-std.h" +#include "map.h" +#include "ref.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*rspamd_map_tmp_dtor) (gpointer p); + +extern guint rspamd_map_log_id; +#define msg_err_map(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_warn_map(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_info_map(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_debug_map(...) rspamd_conditional_debug_fast (NULL, NULL, \ + rspamd_map_log_id, "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) + +enum fetch_proto { + MAP_PROTO_FILE, + MAP_PROTO_HTTP, + MAP_PROTO_HTTPS, + MAP_PROTO_STATIC +}; + +/** + * Data specific to file maps + */ +struct file_map_data { + gchar *filename; + gboolean need_modify; + ev_stat st_ev; +}; + + +struct http_map_data; + +struct rspamd_http_map_cached_cbdata { + ev_timer timeout; + struct ev_loop *event_loop; + struct rspamd_storage_shmem *shm; + struct rspamd_map *map; + struct http_map_data *data; + guint64 gen; + time_t last_checked; +}; + +struct rspamd_map_cachepoint { + gint available; + gsize len; + time_t last_modified; + gchar shmem_name[256]; +}; + +/** + * Data specific to HTTP maps + */ +struct http_map_data { + /* Shared cache data */ + struct rspamd_map_cachepoint *cache; + /* Non-shared for cache owner, used to cleanup cache */ + struct rspamd_http_map_cached_cbdata *cur_cache_cbd; + gchar *userinfo; + gchar *path; + gchar *host; + gchar *rest; + rspamd_fstring_t *etag; + time_t last_modified; + time_t last_checked; + gboolean request_sent; + guint64 gen; + guint16 port; +}; + +struct static_map_data { + guchar *data; + gsize len; + gboolean processed; +}; + +union rspamd_map_backend_data { + struct file_map_data *fd; + struct http_map_data *hd; + struct static_map_data *sd; +}; + +struct rspamd_map_backend { + enum fetch_proto protocol; + gboolean is_signed; + gboolean is_compressed; + gboolean is_fallback; + struct ev_loop *event_loop; + guint32 id; + struct rspamd_cryptobox_pubkey *trusted_pubkey; + union rspamd_map_backend_data data; + gchar *uri; + ref_entry_t ref; +}; + +struct map_periodic_cbdata; + +struct rspamd_map { + struct rspamd_dns_resolver *r; + struct rspamd_config *cfg; + GPtrArray *backends; + struct rspamd_map_backend *fallback_backend; + map_cb_t read_callback; + map_fin_cb_t fin_callback; + map_dtor_t dtor; + void **user_data; + struct ev_loop *event_loop; + struct rspamd_worker *wrk; + gchar *description; + gchar *name; + guint32 id; + struct map_periodic_cbdata *scheduled_check; + rspamd_map_tmp_dtor tmp_dtor; + gpointer tmp_dtor_data; + rspamd_map_traverse_function traverse_function; + gpointer lua_map; + gsize nelts; + guint64 digest; + /* Should we check HTTP or just load cached data */ + ev_tstamp timeout; + gdouble poll_timeout; + time_t next_check; + gboolean active_http; + gboolean non_trivial; /* E.g. has http backends in active mode */ + gboolean file_only; /* No HTTP backends found */ + gboolean static_only; /* No need to check */ + /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */ + gint *locked; + gchar tag[MEMPOOL_UID_LEN]; +}; + +enum rspamd_map_http_stage { + http_map_resolve_host2 = 0, /* 2 requests sent */ + http_map_resolve_host1, /* 1 requests sent */ + http_map_http_conn, /* http connection */ + http_map_terminated /* terminated when doing resolving */ +}; + +struct map_periodic_cbdata { + struct rspamd_map *map; + struct map_cb_data cbdata; + ev_timer ev; + gboolean need_modify; + gboolean errored; + gboolean locked; + guint cur_backend; + ref_entry_t ref; +}; + +static const gchar rspamd_http_file_magic[] = + {'r', 'm', 'c', 'd', '2', '0', '0', '0'}; + +struct rspamd_http_file_data { + guchar magic[sizeof (rspamd_http_file_magic)]; + goffset data_off; + gulong mtime; + gulong next_check; + gulong etag_len; +}; + +struct http_callback_data { + struct ev_loop *event_loop; + struct rspamd_http_connection *conn; + GPtrArray *addrs; + rspamd_inet_addr_t *addr; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + struct http_map_data *data; + struct map_periodic_cbdata *periodic; + struct rspamd_cryptobox_pubkey *pk; + struct rspamd_storage_shmem *shmem_data; + gsize data_len; + gboolean check; + enum rspamd_map_http_stage stage; + ev_tstamp timeout; + + ref_entry_t ref; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBUTIL_MAP_PRIVATE_H_ */ diff --git a/src/libserver/milter.c b/src/libserver/milter.c index 3a12b08a9..26e1fd1cd 100644 --- a/src/libserver/milter.c +++ b/src/libserver/milter.c @@ -22,8 +22,8 @@ #include "unix-std.h" #include "logger.h" #include "ottery.h" -#include "libutil/http_connection.h" -#include "libutil/http_private.h" +#include "libserver/http/http_connection.h" +#include "libserver/http/http_private.h" #include "libserver/protocol_internal.h" #include "libserver/cfg_file_private.h" #include "libmime/scan_result.h" diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index a700ad180..727ada37f 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -17,7 +17,7 @@ #include "rspamd.h" #include "message.h" #include "utlist.h" -#include "http_private.h" +#include "libserver/http/http_private.h" #include "worker_private.h" #include "libserver/cfg_file_private.h" #include "libmime/scan_result_private.h" diff --git a/src/libserver/protocol.h b/src/libserver/protocol.h index 2ba10e926..460e70fd6 100644 --- a/src/libserver/protocol.h +++ b/src/libserver/protocol.h @@ -8,7 +8,7 @@ #include "config.h" #include "scan_result.h" -#include "http_connection.h" +#include "libserver/http/http_connection.h" #include "task.h" #ifdef __cplusplus diff --git a/src/libserver/rspamd_control.c b/src/libserver/rspamd_control.c index e119e64d4..cb2efecb9 100644 --- a/src/libserver/rspamd_control.c +++ b/src/libserver/rspamd_control.c @@ -17,8 +17,8 @@ #include "rspamd.h" #include "rspamd_control.h" #include "worker_util.h" -#include "libutil/http_connection.h" -#include "libutil/http_private.h" +#include "libserver/http/http_connection.h" +#include "libserver/http/http_private.h" #include "libutil/libev_helper.h" #include "unix-std.h" #include "utlist.h" diff --git a/src/libserver/ssl_util.c b/src/libserver/ssl_util.c new file mode 100644 index 000000000..bff4d5014 --- /dev/null +++ b/src/libserver/ssl_util.c @@ -0,0 +1,973 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "libutil/util.h" +#include "libserver/logger.h" +#include "ssl_util.h" +#include "unix-std.h" + +#include <openssl/ssl.h> +#include <openssl/err.h> +#include <openssl/rand.h> +#include <openssl/conf.h> +#include <openssl/x509v3.h> + +enum rspamd_ssl_state { + ssl_conn_reset = 0, + ssl_conn_init, + ssl_conn_connected, + ssl_next_read, + ssl_next_write, + ssl_next_shutdown, +}; + +enum rspamd_ssl_shutdown { + ssl_shut_default = 0, + ssl_shut_unclean, +}; + +struct rspamd_ssl_connection { + gint fd; + enum rspamd_ssl_state state; + enum rspamd_ssl_shutdown shut; + gboolean verify_peer; + SSL *ssl; + gchar *hostname; + struct rspamd_io_ev *ev; + struct rspamd_io_ev *shut_ev; + struct ev_loop *event_loop; + rspamd_ssl_handler_t handler; + rspamd_ssl_error_handler_t err_handler; + gpointer handler_data; + gchar log_tag[8]; +}; + +#define msg_debug_ssl(...) rspamd_conditional_debug_fast (NULL, NULL, \ + rspamd_ssl_log_id, "ssl", conn->log_tag, \ + G_STRFUNC, \ + __VA_ARGS__) + +static void rspamd_ssl_event_handler (gint fd, short what, gpointer ud); + +INIT_LOG_MODULE(ssl) + +static GQuark +rspamd_ssl_quark (void) +{ + return g_quark_from_static_string ("rspamd-ssl"); +} + +#if (OPENSSL_VERSION_NUMBER >= 0x10100000L) && !defined(LIBRESSL_VERSION_NUMBER) +#ifndef X509_get_notBefore +#define X509_get_notBefore(x) X509_get0_notBefore(x) +#endif +#ifndef X509_get_notAfter +#define X509_get_notAfter(x) X509_get0_notAfter(x) +#endif +#ifndef ASN1_STRING_data +#define ASN1_STRING_data(x) ASN1_STRING_get0_data(x) +#endif +#endif + +/* $OpenBSD: tls_verify.c,v 1.14 2015/09/29 10:17:04 deraadt Exp $ */ +/* + * Copyright (c) 2014 Jeremie Courreges-Anglas <jca@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +static gboolean +rspamd_tls_match_name (const char *cert_name, const char *name) +{ + const char *cert_domain, *domain, *next_dot; + + if (g_ascii_strcasecmp (cert_name, name) == 0) { + return TRUE; + } + + /* Wildcard match? */ + if (cert_name[0] == '*') { + /* + * Valid wildcards: + * - "*.domain.tld" + * - "*.sub.domain.tld" + * - etc. + * Reject "*.tld". + * No attempt to prevent the use of eg. "*.co.uk". + */ + cert_domain = &cert_name[1]; + /* Disallow "*" */ + if (cert_domain[0] == '\0') { + return FALSE; + } + + /* Disallow "*foo" */ + if (cert_domain[0] != '.') { + return FALSE; + } + /* Disallow "*.." */ + if (cert_domain[1] == '.') { + return FALSE; + } + next_dot = strchr (&cert_domain[1], '.'); + /* Disallow "*.bar" */ + if (next_dot == NULL) { + return FALSE; + } + /* Disallow "*.bar.." */ + if (next_dot[1] == '.') { + return FALSE; + } + + domain = strchr (name, '.'); + + /* No wildcard match against a name with no host part. */ + if (name[0] == '.') { + return FALSE; + } + /* No wildcard match against a name with no domain part. */ + if (domain == NULL || strlen (domain) == 1) { + return FALSE; + } + + if (g_ascii_strcasecmp (cert_domain, domain) == 0) { + return TRUE; + } + } + + return FALSE; +} + +/* See RFC 5280 section 4.2.1.6 for SubjectAltName details. */ +static gboolean +rspamd_tls_check_subject_altname (X509 *cert, const char *name) +{ + STACK_OF(GENERAL_NAME) *altname_stack = NULL; + int addrlen, type; + int count, i; + union { + struct in_addr ip4; + struct in6_addr ip6; + } addrbuf; + gboolean ret = FALSE; + + altname_stack = X509_get_ext_d2i (cert, NID_subject_alt_name, NULL, NULL); + + if (altname_stack == NULL) { + return FALSE; + } + + if (inet_pton (AF_INET, name, &addrbuf) == 1) { + type = GEN_IPADD; + addrlen = 4; + } + else if (inet_pton (AF_INET6, name, &addrbuf) == 1) { + type = GEN_IPADD; + addrlen = 16; + } + else { + type = GEN_DNS; + addrlen = 0; + } + + count = sk_GENERAL_NAME_num (altname_stack); + + for (i = 0; i < count; i++) { + GENERAL_NAME *altname; + + altname = sk_GENERAL_NAME_value (altname_stack, i); + + if (altname->type != type) { + continue; + } + + if (type == GEN_DNS) { + const char *data; + int format, len; + + format = ASN1_STRING_type (altname->d.dNSName); + + if (format == V_ASN1_IA5STRING) { + data = (const char *)ASN1_STRING_data (altname->d.dNSName); + len = ASN1_STRING_length (altname->d.dNSName); + + if (len < 0 || len != (gint)strlen (data)) { + ret = FALSE; + break; + } + + /* + * Per RFC 5280 section 4.2.1.6: + * " " is a legal domain name, but that + * dNSName must be rejected. + */ + if (strcmp (data, " ") == 0) { + ret = FALSE; + break; + } + + if (rspamd_tls_match_name (data, name)) { + ret = TRUE; + break; + } + } + } + else if (type == GEN_IPADD) { + const char *data; + int datalen; + + datalen = ASN1_STRING_length (altname->d.iPAddress); + data = (const char *)ASN1_STRING_data (altname->d.iPAddress); + + if (datalen < 0) { + ret = FALSE; + break; + } + + /* + * Per RFC 5280 section 4.2.1.6: + * IPv4 must use 4 octets and IPv6 must use 16 octets. + */ + if (datalen == addrlen && memcmp (data, &addrbuf, addrlen) == 0) { + ret = TRUE; + break; + } + } + } + + sk_GENERAL_NAME_pop_free (altname_stack, GENERAL_NAME_free); + return ret; +} + +static gboolean +rspamd_tls_check_common_name (X509 *cert, const char *name) +{ + X509_NAME *subject_name; + char *common_name = NULL; + union { + struct in_addr ip4; + struct in6_addr ip6; + } addrbuf; + int common_name_len; + gboolean ret = FALSE; + + subject_name = X509_get_subject_name (cert); + if (subject_name == NULL) { + goto out; + } + + common_name_len = X509_NAME_get_text_by_NID (subject_name, NID_commonName, NULL, 0); + + if (common_name_len < 0) { + goto out; + } + + common_name = g_malloc0 (common_name_len + 1); + X509_NAME_get_text_by_NID (subject_name, NID_commonName, common_name, + common_name_len + 1); + + /* NUL bytes in CN? */ + if (common_name_len != (gint)strlen (common_name)) { + goto out; + } + + if (inet_pton (AF_INET, name, &addrbuf) == 1 + || inet_pton (AF_INET6, name, &addrbuf) == 1) { + /* + * We don't want to attempt wildcard matching against IP + * addresses, so perform a simple comparison here. + */ + if (strcmp (common_name, name) == 0) { + ret = TRUE; + } + else { + ret = FALSE; + } + + goto out; + } + + if (rspamd_tls_match_name (common_name, name)) { + ret = TRUE; + } + +out: + g_free (common_name); + + return ret; +} + +static gboolean +rspamd_tls_check_name (X509 *cert, const char *name) +{ + gboolean ret; + + ret = rspamd_tls_check_subject_altname (cert, name); + if (ret) { + return ret; + } + + return rspamd_tls_check_common_name (cert, name); +} + +static gboolean +rspamd_ssl_peer_verify (struct rspamd_ssl_connection *c) +{ + X509 *server_cert; + glong ver_err; + GError *err = NULL; + + ver_err = SSL_get_verify_result (c->ssl); + + if (ver_err != X509_V_OK) { + g_set_error (&err, rspamd_ssl_quark (), ver_err, "certificate validation " + "failed: %s", X509_verify_cert_error_string (ver_err)); + c->err_handler (c->handler_data, err); + g_error_free (err); + + return FALSE; + } + + /* Get server's certificate */ + server_cert = SSL_get_peer_certificate (c->ssl); + if (server_cert == NULL) { + g_set_error (&err, rspamd_ssl_quark (), ver_err, "peer certificate is absent"); + c->err_handler (c->handler_data, err); + g_error_free (err); + + return FALSE; + } + + if (c->hostname) { + if (!rspamd_tls_check_name (server_cert, c->hostname)) { + X509_free (server_cert); + g_set_error (&err, rspamd_ssl_quark (), ver_err, "peer certificate fails " + "hostname verification for %s", c->hostname); + c->err_handler (c->handler_data, err); + g_error_free (err); + + return FALSE; + } + } + + X509_free (server_cert); + + return TRUE; +} + +static void +rspamd_tls_set_error (gint retcode, const gchar *stage, GError **err) +{ + GString *reason; + gchar buf[120]; + gint err_code = 0, last_err = 0; + + reason = g_string_sized_new (sizeof (buf)); + + if (retcode == SSL_ERROR_SYSCALL) { + rspamd_printf_gstring (reason, "syscall fail: %s", strerror (errno)); + err_code = errno; + } + else { + while ((err_code = ERR_get_error()) != 0) { + last_err = err_code; + ERR_error_string (err_code, buf); + rspamd_printf_gstring (reason, "ssl error: %s,", buf); + } + + err_code = last_err; + + if (reason->len > 0 && reason->str[reason->len - 1] == ',') { + reason->str[reason->len - 1] = '\0'; + reason->len --; + } + } + + g_set_error (err, rspamd_ssl_quark (), err_code, + "ssl %s error: %s", stage, reason->str); + g_string_free (reason, TRUE); +} + +static void +rspamd_ssl_connection_dtor (struct rspamd_ssl_connection *conn) +{ + SSL_free (conn->ssl); + + if (conn->hostname) { + g_free (conn->hostname); + } + + if (conn->shut_ev) { + rspamd_ev_watcher_stop (conn->event_loop, conn->shut_ev); + g_free (conn->shut_ev); + } + + close (conn->fd); + g_free (conn); +} + +static void +rspamd_ssl_shutdown (struct rspamd_ssl_connection *conn) +{ + gint ret = 0, nret, retries; + static const gint max_retries = 5; + + /* + * Fucking openssl... + * From the manual, 0 means: "The shutdown is not yet finished. + * Call SSL_shutdown() for a second time, + * if a bidirectional shutdown shall be performed. + * The output of SSL_get_error(3) may be misleading, + * as an erroneous SSL_ERROR_SYSCALL may be flagged + * even though no error occurred." + * + * What is `second`, what if `second` also returns 0? + * What a retarded behaviour! + */ + for (retries = 0; retries < max_retries; retries ++) { + ret = SSL_shutdown (conn->ssl); + + if (ret != 0) { + break; + } + } + + if (ret == 1) { + /* All done */ + msg_debug_ssl ("ssl shutdown: all done"); + rspamd_ssl_connection_dtor (conn); + } + else if (ret < 0) { + short what; + + nret = SSL_get_error (conn->ssl, ret); + conn->state = ssl_next_shutdown; + + if (nret == SSL_ERROR_WANT_READ) { + msg_debug_ssl ("ssl shutdown: need read"); + what = EV_READ; + } + else if (nret == SSL_ERROR_WANT_WRITE) { + msg_debug_ssl ("ssl shutdown: need write"); + what = EV_WRITE; + } + else { + /* Cannot do anything else, fatal error */ + GError *err = NULL; + + rspamd_tls_set_error (nret, "final shutdown", &err); + msg_debug_ssl ("ssl shutdown: fatal error: %e; retries=%d; ret=%d", + err, retries, ret); + g_error_free (err); + rspamd_ssl_connection_dtor (conn); + + return; + } + + /* As we own fd, we can try to perform shutdown one more time */ + /* BUGON: but we DO NOT own conn->ev, and it's a big issue */ + static const ev_tstamp shutdown_time = 5.0; + + if (conn->shut_ev == NULL) { + rspamd_ev_watcher_stop (conn->event_loop, conn->ev); + conn->shut_ev = g_malloc0 (sizeof (*conn->shut_ev)); + rspamd_ev_watcher_init (conn->shut_ev, conn->fd, what, + rspamd_ssl_event_handler, conn); + rspamd_ev_watcher_start (conn->event_loop, conn->shut_ev, shutdown_time); + /* XXX: can it be done safely ? */ + conn->ev = conn->shut_ev; + } + else { + rspamd_ev_watcher_reschedule (conn->event_loop, conn->shut_ev, what); + } + + conn->state = ssl_next_shutdown; + } + else if (ret == 0) { + /* What can we do here?? */ + msg_debug_ssl ("ssl shutdown: openssl failed to initiate shutdown after " + "%d attempts!", max_retries); + rspamd_ssl_connection_dtor (conn); + } +} + +static void +rspamd_ssl_event_handler (gint fd, short what, gpointer ud) +{ + struct rspamd_ssl_connection *conn = ud; + gint ret; + GError *err = NULL; + + if (what == EV_TIMER) { + if (conn->state == ssl_next_shutdown) { + /* No way to restore, just terminate */ + rspamd_ssl_connection_dtor (conn); + } + else { + conn->shut = ssl_shut_unclean; + rspamd_ev_watcher_stop (conn->event_loop, conn->ev); + g_set_error (&err, rspamd_ssl_quark (), ETIMEDOUT, + "ssl connection timed out"); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + } + + return; + } + + msg_debug_ssl ("ssl event; what=%d; c->state=%d", (int)what, + (int)conn->state); + + switch (conn->state) { + case ssl_conn_init: + /* Continue connection */ + ret = SSL_connect (conn->ssl); + + if (ret == 1) { + rspamd_ev_watcher_stop (conn->event_loop, conn->ev); + /* Verify certificate */ + if ((!conn->verify_peer) || rspamd_ssl_peer_verify (conn)) { + msg_debug_ssl ("ssl connect: connected"); + conn->state = ssl_conn_connected; + conn->handler (fd, EV_WRITE, conn->handler_data); + } + else { + return; + } + } + else { + ret = SSL_get_error (conn->ssl, ret); + + if (ret == SSL_ERROR_WANT_READ) { + msg_debug_ssl ("ssl connect: need read"); + what = EV_READ; + } + else if (ret == SSL_ERROR_WANT_WRITE) { + msg_debug_ssl ("ssl connect: need write"); + what = EV_WRITE; + } + else { + rspamd_ev_watcher_stop (conn->event_loop, conn->ev); + rspamd_tls_set_error (ret, "connect", &err); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + return; + } + + rspamd_ev_watcher_reschedule (conn->event_loop, conn->ev, what); + + } + break; + case ssl_next_read: + rspamd_ev_watcher_reschedule (conn->event_loop, conn->ev, EV_READ); + conn->state = ssl_conn_connected; + conn->handler (fd, EV_READ, conn->handler_data); + break; + case ssl_next_write: + rspamd_ev_watcher_reschedule (conn->event_loop, conn->ev, EV_WRITE); + conn->state = ssl_conn_connected; + conn->handler (fd, EV_WRITE, conn->handler_data); + break; + case ssl_conn_connected: + rspamd_ev_watcher_reschedule (conn->event_loop, conn->ev, what); + conn->state = ssl_conn_connected; + conn->handler (fd, what, conn->handler_data); + break; + case ssl_next_shutdown: + rspamd_ssl_shutdown (conn); + break; + default: + rspamd_ev_watcher_stop (conn->event_loop, conn->ev); + g_set_error (&err, rspamd_ssl_quark (), EINVAL, + "ssl bad state error: %d", conn->state); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + break; + } +} + +struct rspamd_ssl_connection * +rspamd_ssl_connection_new (gpointer ssl_ctx, struct ev_loop *ev_base, + gboolean verify_peer, const gchar *log_tag) +{ + struct rspamd_ssl_connection *c; + + g_assert (ssl_ctx != NULL); + c = g_malloc0 (sizeof (*c)); + c->ssl = SSL_new (ssl_ctx); + c->event_loop = ev_base; + c->verify_peer = verify_peer; + + if (log_tag) { + rspamd_strlcpy (c->log_tag, log_tag, sizeof (log_tag)); + } + else { + rspamd_random_hex (c->log_tag, sizeof (log_tag) - 1); + c->log_tag[sizeof (log_tag) - 1] = '\0'; + } + + return c; +} + + +gboolean +rspamd_ssl_connect_fd (struct rspamd_ssl_connection *conn, gint fd, + const gchar *hostname, struct rspamd_io_ev *ev, ev_tstamp timeout, + rspamd_ssl_handler_t handler, rspamd_ssl_error_handler_t err_handler, + gpointer handler_data) +{ + gint ret; + + g_assert (conn != NULL); + + if (conn->state != ssl_conn_reset) { + return FALSE; + } + + /* We dup fd to allow graceful closing */ + gint nfd = dup (fd); + + if (nfd == -1) { + return FALSE; + } + + conn->fd = nfd; + conn->ev = ev; + conn->handler = handler; + conn->err_handler = err_handler; + conn->handler_data = handler_data; + + if (SSL_set_fd (conn->ssl, conn->fd) != 1) { + close (conn->fd); + + return FALSE; + } + + if (hostname) { + conn->hostname = g_strdup (hostname); +#ifdef HAVE_SSL_TLSEXT_HOSTNAME + SSL_set_tlsext_host_name (conn->ssl, conn->hostname); +#endif + } + + conn->state = ssl_conn_init; + + ret = SSL_connect (conn->ssl); + + if (ret == 1) { + conn->state = ssl_conn_connected; + + msg_debug_ssl ("connected, start write event"); + rspamd_ev_watcher_stop (conn->event_loop, ev); + rspamd_ev_watcher_init (ev, nfd, EV_WRITE, rspamd_ssl_event_handler, conn); + rspamd_ev_watcher_start (conn->event_loop, ev, timeout); + } + else { + ret = SSL_get_error (conn->ssl, ret); + + if (ret == SSL_ERROR_WANT_READ) { + msg_debug_ssl ("not connected, want read"); + } + else if (ret == SSL_ERROR_WANT_WRITE) { + msg_debug_ssl ("not connected, want write"); + } + else { + GError *err = NULL; + + conn->shut = ssl_shut_unclean; + rspamd_tls_set_error (ret, "initial connect", &err); + msg_debug_ssl ("not connected, fatal error %e", err); + g_error_free (err); + + + return FALSE; + } + + rspamd_ev_watcher_stop (conn->event_loop, ev); + rspamd_ev_watcher_init (ev, nfd, EV_WRITE|EV_READ, + rspamd_ssl_event_handler, conn); + rspamd_ev_watcher_start (conn->event_loop, ev, timeout); + } + + return TRUE; +} + +gssize +rspamd_ssl_read (struct rspamd_ssl_connection *conn, gpointer buf, + gsize buflen) +{ + gint ret; + short what; + GError *err = NULL; + + g_assert (conn != NULL); + + if (conn->state != ssl_conn_connected && conn->state != ssl_next_read) { + errno = EINVAL; + g_set_error (&err, rspamd_ssl_quark (), ECONNRESET, + "ssl state error: cannot read data"); + conn->shut = ssl_shut_unclean; + conn->err_handler (conn->handler_data, err); + g_error_free (err); + + return -1; + } + + ret = SSL_read (conn->ssl, buf, buflen); + msg_debug_ssl ("ssl read: %d", ret); + + if (ret > 0) { + conn->state = ssl_conn_connected; + return ret; + } + else if (ret == 0) { + ret = SSL_get_error (conn->ssl, ret); + + if (ret == SSL_ERROR_ZERO_RETURN || ret == SSL_ERROR_SYSCALL) { + conn->state = ssl_conn_reset; + return 0; + } + else { + conn->shut = ssl_shut_unclean; + rspamd_tls_set_error (ret, "read", &err); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + errno = EINVAL; + + return -1; + } + } + else { + ret = SSL_get_error (conn->ssl, ret); + conn->state = ssl_next_read; + what = 0; + + if (ret == SSL_ERROR_WANT_READ) { + msg_debug_ssl ("ssl read: need read"); + what |= EV_READ; + } + else if (ret == SSL_ERROR_WANT_WRITE) { + msg_debug_ssl ("ssl read: need write"); + what |= EV_WRITE; + } + else { + conn->shut = ssl_shut_unclean; + rspamd_tls_set_error (ret, "read", &err); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + errno = EINVAL; + + return -1; + } + + rspamd_ev_watcher_reschedule (conn->event_loop, conn->ev, what); + errno = EAGAIN; + } + + return -1; +} + +gssize +rspamd_ssl_write (struct rspamd_ssl_connection *conn, gconstpointer buf, + gsize buflen) +{ + gint ret; + short what; + GError *err = NULL; + + g_assert (conn != NULL); + + if (conn->state != ssl_conn_connected && conn->state != ssl_next_write) { + errno = EINVAL; + return -1; + } + + ret = SSL_write (conn->ssl, buf, buflen); + msg_debug_ssl ("ssl write: ret=%d, buflen=%z", ret, buflen); + + if (ret > 0) { + conn->state = ssl_conn_connected; + return ret; + } + else if (ret == 0) { + ret = SSL_get_error (conn->ssl, ret); + + if (ret == SSL_ERROR_ZERO_RETURN) { + rspamd_tls_set_error (ret, "write", &err); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + errno = ECONNRESET; + conn->state = ssl_conn_reset; + + return -1; + } + else { + conn->shut = ssl_shut_unclean; + rspamd_tls_set_error (ret, "write", &err); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + errno = EINVAL; + + return -1; + } + } + else { + ret = SSL_get_error (conn->ssl, ret); + conn->state = ssl_next_write; + + if (ret == SSL_ERROR_WANT_READ) { + msg_debug_ssl ("ssl write: need read"); + what = EV_READ; + } + else if (ret == SSL_ERROR_WANT_WRITE) { + msg_debug_ssl ("ssl write: need write"); + what = EV_WRITE; + } + else { + conn->shut = ssl_shut_unclean; + rspamd_tls_set_error (ret, "write", &err); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + errno = EINVAL; + + return -1; + } + + rspamd_ev_watcher_reschedule (conn->event_loop, conn->ev, what); + errno = EAGAIN; + } + + return -1; +} + +gssize +rspamd_ssl_writev (struct rspamd_ssl_connection *conn, struct iovec *iov, + gsize iovlen) +{ + /* + * Static is needed to avoid issue: + * https://github.com/openssl/openssl/issues/6865 + */ + static guchar ssl_buf[16384]; + guchar *p; + struct iovec *cur; + gsize i, remain; + + remain = sizeof (ssl_buf); + p = ssl_buf; + + for (i = 0; i < iovlen; i ++) { + cur = &iov[i]; + + if (cur->iov_len > 0) { + if (remain >= cur->iov_len) { + memcpy (p, cur->iov_base, cur->iov_len); + p += cur->iov_len; + remain -= cur->iov_len; + } + else { + memcpy (p, cur->iov_base, remain); + p += remain; + remain = 0; + break; + } + } + } + + return rspamd_ssl_write (conn, ssl_buf, p - ssl_buf); +} + +/** + * Removes connection data + * @param conn + */ +void +rspamd_ssl_connection_free (struct rspamd_ssl_connection *conn) +{ + if (conn) { + if (conn->shut == ssl_shut_unclean) { + /* Ignore return result and close socket */ + msg_debug_ssl ("unclean shutdown"); + SSL_set_quiet_shutdown (conn->ssl, 1); + (void)SSL_shutdown (conn->ssl); + rspamd_ssl_connection_dtor (conn); + } + else { + msg_debug_ssl ("normal shutdown"); + rspamd_ssl_shutdown (conn); + } + } +} + +gpointer +rspamd_init_ssl_ctx (void) +{ + SSL_CTX *ssl_ctx; + gint ssl_options; + + rspamd_openssl_maybe_init (); + + ssl_ctx = SSL_CTX_new (SSLv23_method ()); + SSL_CTX_set_verify (ssl_ctx, SSL_VERIFY_PEER, NULL); + SSL_CTX_set_verify_depth (ssl_ctx, 4); + ssl_options = SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3; + +#ifdef SSL_OP_NO_COMPRESSION + ssl_options |= SSL_OP_NO_COMPRESSION; +#elif OPENSSL_VERSION_NUMBER >= 0x00908000L + sk_SSL_COMP_zero (SSL_COMP_get_compression_methods ()); +#endif + + SSL_CTX_set_options (ssl_ctx, ssl_options); + + return ssl_ctx; +} + +gpointer rspamd_init_ssl_ctx_noverify (void) +{ + SSL_CTX *ssl_ctx_noverify; + gint ssl_options; + + rspamd_openssl_maybe_init (); + + ssl_options = SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3; + +#ifdef SSL_OP_NO_COMPRESSION + ssl_options |= SSL_OP_NO_COMPRESSION; +#elif OPENSSL_VERSION_NUMBER >= 0x00908000L + sk_SSL_COMP_zero (SSL_COMP_get_compression_methods ()); +#endif + + ssl_ctx_noverify = SSL_CTX_new (SSLv23_method ()); + SSL_CTX_set_verify (ssl_ctx_noverify, SSL_VERIFY_NONE, NULL); + SSL_CTX_set_options (ssl_ctx_noverify, ssl_options); +#ifdef SSL_SESS_CACHE_BOTH + SSL_CTX_set_session_cache_mode (ssl_ctx_noverify, SSL_SESS_CACHE_BOTH); +#endif + + return ssl_ctx_noverify; +} diff --git a/src/libserver/ssl_util.h b/src/libserver/ssl_util.h new file mode 100644 index 000000000..708c07930 --- /dev/null +++ b/src/libserver/ssl_util.h @@ -0,0 +1,104 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_SSL_UTIL_H_ +#define SRC_LIBUTIL_SSL_UTIL_H_ + +#include "config.h" +#include "libutil/addr.h" +#include "libutil/libev_helper.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_ssl_connection; + +typedef void (*rspamd_ssl_handler_t) (gint fd, short what, gpointer d); + +typedef void (*rspamd_ssl_error_handler_t) (gpointer d, GError *err); + +/** + * Creates a new ssl connection data structure + * @param ssl_ctx initialized SSL_CTX structure + * @return opaque connection data + */ +struct rspamd_ssl_connection *rspamd_ssl_connection_new (gpointer ssl_ctx, + struct ev_loop *ev_base, + gboolean verify_peer, + const gchar *log_tag); + +/** + * Connects SSL session using the specified (connected) FD + * @param conn connection + * @param fd fd to use + * @param hostname hostname for SNI + * @param ev event to use + * @param tv timeout for connection + * @param handler connected session handler + * @param handler_data opaque data + * @return TRUE if a session has been connected + */ +gboolean rspamd_ssl_connect_fd (struct rspamd_ssl_connection *conn, gint fd, + const gchar *hostname, struct rspamd_io_ev *ev, ev_tstamp timeout, + rspamd_ssl_handler_t handler, rspamd_ssl_error_handler_t err_handler, + gpointer handler_data); + +/** + * Perform async read from SSL socket + * @param conn + * @param buf + * @param buflen + * @return + */ +gssize rspamd_ssl_read (struct rspamd_ssl_connection *conn, gpointer buf, + gsize buflen); + +/** + * Perform async write to ssl buffer + * @param conn + * @param buf + * @param buflen + * @param ev + * @param tv + * @return + */ +gssize rspamd_ssl_write (struct rspamd_ssl_connection *conn, gconstpointer buf, + gsize buflen); + +/** + * Emulate writev by copying iovec to a temporary buffer + * @param conn + * @param buf + * @param buflen + * @return + */ +gssize rspamd_ssl_writev (struct rspamd_ssl_connection *conn, struct iovec *iov, + gsize iovlen); + +/** + * Removes connection data + * @param conn + */ +void rspamd_ssl_connection_free (struct rspamd_ssl_connection *conn); + +gpointer rspamd_init_ssl_ctx (void); +gpointer rspamd_init_ssl_ctx_noverify (void); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBUTIL_SSL_UTIL_H_ */ diff --git a/src/libserver/task.h b/src/libserver/task.h index 50e07b23f..778b77dbf 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -17,7 +17,7 @@ #define TASK_H_ #include "config.h" -#include "http_connection.h" +#include "libserver/http/http_connection.h" #include "async_session.h" #include "util.h" #include "mem_pool.h" diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c index ebc6a1980..ceb2f1103 100644 --- a/src/libserver/worker_util.c +++ b/src/libserver/worker_util.c @@ -21,15 +21,12 @@ #include "utlist.h" #include "ottery.h" #include "rspamd_control.h" -#include "libutil/map.h" -#include "libutil/map_private.h" -#include "libutil/http_private.h" -#include "libutil/http_router.h" +#include "libserver/maps/map.h" +#include "libserver/maps/map_private.h" +#include "libserver/http/http_private.h" +#include "libserver/http/http_router.h" #include "libutil/rrd.h" -#ifdef WITH_GPERF_TOOLS -#include <gperftools/profiler.h> -#endif /* sys/resource.h */ #ifdef HAVE_SYS_RESOURCE_H #include <sys/resource.h> @@ -218,9 +215,6 @@ rspamd_worker_on_delayed_shutdown (EV_P_ ev_timer *w, int revents) worker->state = rspamd_worker_wanna_die; ev_timer_stop (EV_A_ w); ev_break (loop, EVBREAK_ALL); -#ifdef WITH_GPERF_TOOLS - ProfilerStop (); -#endif } static void @@ -481,13 +475,6 @@ rspamd_prepare_worker (struct rspamd_worker *worker, const char *name, struct rspamd_worker_listen_socket *ls; struct rspamd_worker_accept_event *accept_ev; -#ifdef WITH_PROFILER - extern void _start (void), etext (void); - monstartup ((u_long) & _start, (u_long) & etext); -#endif - - gperf_profiler_init (worker->srv->cfg, name); - worker->signal_events = g_hash_table_new_full (g_direct_hash, g_direct_equal, NULL, rspamd_sigh_free); diff --git a/src/libserver/worker_util.h b/src/libserver/worker_util.h index 298243961..0e9e60545 100644 --- a/src/libserver/worker_util.h +++ b/src/libserver/worker_util.h @@ -18,7 +18,7 @@ #include "config.h" #include "util.h" -#include "http_connection.h" +#include "libserver/http/http_connection.h" #include "rspamd.h" #ifdef __cplusplus |