From 2672f82d536eeed406c6c9882d972ac24e458cf3 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 10 Jun 2016 14:17:13 +0100 Subject: [PATCH] [Rework] Rework HTTP code - Restructure body storage - Make abstract API to manage body of HTTP messages - Implement messages with body in shared memory - Avoid copying when serving files - Allow to share content between messages --- src/libutil/http.c | 347 ++++++++++++++++++++++++++++++++++++--------- src/libutil/http.h | 69 ++++++++- 2 files changed, 346 insertions(+), 70 deletions(-) diff --git a/src/libutil/http.c b/src/libutil/http.c index fc8263ddd..84faaad89 100644 --- a/src/libutil/http.c +++ b/src/libutil/http.c @@ -592,8 +592,10 @@ rspamd_http_on_headers_complete (http_parser * parser) struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; priv = conn->priv; + msg = priv->msg; if (priv->header != NULL) { rspamd_http_finish_header (conn, priv); @@ -602,20 +604,17 @@ rspamd_http_on_headers_complete (http_parser * parser) priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_NEW_HEADER; } - if (parser->content_length != 0 && parser->content_length != ULLONG_MAX) { - priv->msg->body = rspamd_fstring_sized_new (parser->content_length); - } - else { - priv->msg->body = rspamd_fstring_new (); + if (!rspamd_http_message_set_body (msg, NULL, parser->content_length)) { + return -1; } if (parser->flags & F_SPAMC) { - priv->msg->flags |= RSPAMD_HTTP_FLAG_SPAMC; + msg->flags |= RSPAMD_HTTP_FLAG_SPAMC; } - priv->msg->body_buf.begin = priv->msg->body->str; - priv->msg->method = parser->method; - priv->msg->code = parser->status_code; + + msg->method = parser->method; + msg->code = parser->status_code; return 0; } @@ -626,18 +625,18 @@ rspamd_http_on_body (http_parser * parser, const gchar *at, size_t length) struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; priv = conn->priv; + msg = priv->msg; - priv->msg->body = rspamd_fstring_append (priv->msg->body, at, length); - - /* Append might cause realloc */ - priv->msg->body_buf.begin = priv->msg->body->str; - priv->msg->body_buf.len = priv->msg->body->len; + if (!rspamd_http_message_append_body (msg, at, length)) { + return -1; + } if ((conn->opts & RSPAMD_HTTP_BODY_PARTIAL) && !IS_CONN_ENCRYPTED (priv)) { /* Incremental update is impossible for encrypted requests so far */ - return (conn->body_handler (conn, priv->msg, at, length)); + return (conn->body_handler (conn, msg, at, length)); } return 0; @@ -710,10 +709,10 @@ rspamd_http_decrypt_message (struct rspamd_http_connection *conn, enum rspamd_cryptobox_mode mode; mode = rspamd_keypair_alg (priv->local_key); - nonce = msg->body->str; - m = msg->body->str + rspamd_cryptobox_nonce_bytes (mode) + + nonce = msg->body_buf.str; + m = msg->body_buf.str + rspamd_cryptobox_nonce_bytes (mode) + rspamd_cryptobox_mac_bytes (mode); - dec_len = msg->body->len - rspamd_cryptobox_nonce_bytes (mode) - + dec_len = msg->body_buf.len - rspamd_cryptobox_nonce_bytes (mode) - rspamd_cryptobox_mac_bytes (mode); if ((nm = rspamd_pubkey_get_nm (peer_key)) == NULL) { @@ -781,7 +780,7 @@ rspamd_http_on_message_complete (http_parser * parser) mode = rspamd_keypair_alg (priv->local_key); if (priv->local_key == NULL || priv->msg->peer_key == NULL || - priv->msg->body->len < rspamd_cryptobox_nonce_bytes (mode) + + priv->msg->body_buf.len < rspamd_cryptobox_nonce_bytes (mode) + rspamd_cryptobox_mac_bytes (mode)) { msg_err ("cannot decrypt message"); return -1; @@ -1172,13 +1171,6 @@ rspamd_http_connection_steal_msg (struct rspamd_http_connection *conn) msg->peer_key = NULL; } priv->msg = NULL; - - /* We also might need to adjust body/body_buf */ - if (msg->body_buf.begin > msg->body->str) { - memmove (msg->body->str, msg->body_buf.begin, msg->body_buf.len); - msg->body->len = msg->body_buf.len; - msg->body_buf.begin = msg->body->str; - } } return msg; @@ -1190,17 +1182,60 @@ rspamd_http_connection_copy_msg (struct rspamd_http_connection *conn) struct rspamd_http_connection_private *priv; struct rspamd_http_message *new_msg, *msg; struct rspamd_http_header *hdr, *nhdr; + const gchar *old_body; + gsize old_len; + struct stat st; + union _rspamd_storage_u *storage; priv = conn->priv; msg = priv->msg; new_msg = rspamd_http_new_message (msg->type); + new_msg->flags = msg->flags; - if (msg->body) { - new_msg->body = rspamd_fstring_new_init (msg->body->str, - msg->body->len); - new_msg->body_buf.begin = msg->body_buf.begin; - new_msg->body_buf.len = msg->body_buf.len; + if (msg->body_buf.len > 0) { + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + /* Avoid copying by just maping a shared segment */ + new_msg->flags |= RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE; + + storage = &new_msg->body_buf.c; + storage->shared.shm_fd = dup (msg->body_buf.c.shared.shm_fd); + + if (storage->shared.shm_fd == -1) { + rspamd_http_message_free (new_msg); + return NULL; + } + + if (fstat (storage->shared.shm_fd, &st) == -1) { + rspamd_http_message_free (new_msg); + return NULL; + } + + /* We don't own segment, so do not try to touch it */ + storage->shared.shm_name = NULL; + new_msg->body_buf.str = mmap (NULL, st.st_size, + PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + + if (new_msg->body_buf.str == MAP_FAILED) { + rspamd_http_message_free (new_msg); + return NULL; + } + + new_msg->body_buf.begin = new_msg->body_buf.str; + new_msg->body_buf.len = st.st_size; + new_msg->body_buf.begin = new_msg->body_buf.str + + (msg->body_buf.begin - msg->body_buf.str); + } + else { + old_body = rspamd_http_message_get_body (msg, &old_len); + + if (!rspamd_http_message_set_body (new_msg, old_body, old_len)) { + rspamd_http_message_free (new_msg); + return NULL; + } + } } if (msg->url) { @@ -1222,7 +1257,6 @@ rspamd_http_connection_copy_msg (struct rspamd_http_connection *conn) new_msg->method = msg->method; new_msg->port = msg->port; new_msg->date = msg->date; - new_msg->flags = msg->flags; new_msg->last_modified = msg->last_modified; LL_FOREACH (msg->headers, hdr) { @@ -1469,14 +1503,14 @@ rspamd_http_connection_write_message (struct rspamd_http_connection *conn, if (encrypted) { mode = rspamd_keypair_alg (priv->local_key); - if (msg->body == NULL || msg->body->len == 0) { + if (msg->body_buf.len == 0) { pbody = NULL; bodylen = 0; msg->method = HTTP_GET; } else { - pbody = msg->body->str; - bodylen = msg->body->len; + pbody = (gchar *)msg->body_buf.begin; + bodylen = msg->body_buf.len; msg->method = HTTP_POST; } @@ -1534,22 +1568,22 @@ rspamd_http_connection_write_message (struct rspamd_http_connection *conn, } else { if (msg->method < HTTP_SYMBOLS) { - if (msg->body == NULL || msg->body->len == 0) { + if (msg->body_buf.len == 0) { pbody = NULL; bodylen = 0; priv->outlen = 2; msg->method = HTTP_GET; } else { - pbody = msg->body->str; - bodylen = msg->body->len; + pbody = (gchar *)msg->body_buf.begin; + bodylen = msg->body_buf.len; priv->outlen = 3; msg->method = HTTP_POST; } } - else if (msg->body != NULL) { - pbody = msg->body->str; - bodylen = msg->body->len; + else if (msg->body_buf.len > 0) { + pbody = (gchar *)msg->body_buf.begin; + bodylen = msg->body_buf.len; priv->outlen = 2; } else { @@ -1796,11 +1830,6 @@ rspamd_http_connection_write_message (struct rspamd_http_connection *conn, } if (pbody != NULL) { - - if (msg->body_buf.begin == NULL && msg->body_buf.len == 0) { - msg->body_buf.begin = msg->body->str; - } - priv->out[i].iov_base = pbody; priv->out[i++].iov_len = bodylen; } @@ -1825,7 +1854,8 @@ rspamd_http_new_message (enum http_parser_type type) { struct rspamd_http_message *new; - new = g_slice_alloc (sizeof (struct rspamd_http_message)); + new = g_slice_alloc0 (sizeof (struct rspamd_http_message)); + if (type == HTTP_REQUEST) { new->url = rspamd_fstring_new (); } @@ -1834,17 +1864,9 @@ rspamd_http_new_message (enum http_parser_type type) new->code = 200; } - new->headers = NULL; - new->date = 0; - new->body = NULL; - memset (&new->body_buf, 0, sizeof (new->body_buf)); - new->status = NULL; - new->host = NULL; new->port = 80; new->type = type; new->method = HTTP_GET; - new->peer_key = NULL; - new->flags = 0; return new; } @@ -1898,10 +1920,186 @@ rspamd_http_message_from_url (const gchar *url) return msg; } +const gchar * +rspamd_http_message_get_body (struct rspamd_http_message *msg, + gsize *blen) +{ + const gchar *ret = NULL; + + if (msg->body_buf.len > 0) { + ret = msg->body_buf.begin; + } + + if (blen) { + *blen = msg->body_buf.len; + } + + return ret; +} + +gboolean +rspamd_http_message_set_body (struct rspamd_http_message *msg, + const gchar *data, gsize len) +{ + union _rspamd_storage_u *storage; + storage = &msg->body_buf.c; + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + storage->shared.shm_name = g_strdup ("/rhm.XXXXXXXXXXXXXXXXXXXX"); + storage->shared.shm_fd = rspamd_shmem_mkstemp (storage->shared.shm_name); + + if (storage->shared.shm_fd == -1) { + return FALSE; + } + + if (len != 0 && len != ULLONG_MAX) { + if (ftruncate (storage->shared.shm_fd, len) == -1) { + return FALSE; + } + + msg->body_buf.str = mmap (NULL, len, + PROT_WRITE|PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + + if (msg->body_buf.str == MAP_FAILED) { + return FALSE; + } + + msg->body_buf.begin = msg->body_buf.str; + + if (data != NULL) { + memcpy (msg->body_buf.str, data, len); + msg->body_buf.len = len; + } + } + else { + msg->body_buf.len = 0; + msg->body_buf.begin = NULL; + msg->body_buf.str = NULL; + } + } + else { + if (len != 0 && len != ULLONG_MAX) { + if (data == NULL) { + storage->normal = rspamd_fstring_sized_new (len); + msg->body_buf.len = 0; + } + else { + storage->normal = rspamd_fstring_new_init (data, len); + msg->body_buf.len = len; + } + } + else { + storage->normal = rspamd_fstring_new (); + } + + msg->body_buf.begin = storage->normal->str; + msg->body_buf.str = storage->normal->str; + } + + return TRUE; +} + +gboolean +rspamd_http_message_set_body_from_fd (struct rspamd_http_message *msg, + gint fd) +{ + union _rspamd_storage_u *storage; + struct stat st; + + storage = &msg->body_buf.c; + msg->flags |= RSPAMD_HTTP_FLAG_SHMEM|RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE; + + storage->shared.shm_fd = dup (fd); + msg->body_buf.str = MAP_FAILED; + + if (storage->shared.shm_fd == -1) { + return FALSE; + } + + if (fstat (storage->shared.shm_fd, &st) == -1) { + return FALSE; + } + + msg->body_buf.str = mmap (NULL, st.st_size, + PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + + if (msg->body_buf.str == MAP_FAILED) { + return FALSE; + } + + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.len = st.st_size; + + return TRUE; +} + +gboolean +rspamd_http_message_append_body (struct rspamd_http_message *msg, + const gchar *data, gsize len) +{ + struct stat st; + union _rspamd_storage_u *storage; + gsize newlen; + + storage = &msg->body_buf.c; + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + if (storage->shared.shm_fd == -1) { + return FALSE; + } + + if (fstat (storage->shared.shm_fd, &st) == -1) { + return FALSE; + } + + /* Unmap as we need another size of segment */ + if (msg->body_buf.str) { + munmap (msg->body_buf.str, msg->body_buf.len); + } + + /* Check if we need to grow */ + if (st.st_size - msg->body_buf.len < len) { + /* Need to grow */ + newlen = rspamd_fstring_suggest_size (msg->body_buf.len, st.st_size, + len); + + if (ftruncate (storage->shared.shm_fd, newlen) == -1) { + return FALSE; + } + } + + msg->body_buf.str = mmap (NULL, msg->body_buf.len + len, + PROT_WRITE|PROT_READ, MAP_SHARED, storage->shared.shm_fd, 0); + + if (msg->body_buf.str == MAP_FAILED) { + return FALSE; + } + + memcpy (msg->body_buf.str + msg->body_buf.len, data, len); + msg->body_buf.len += len; + msg->body_buf.begin = msg->body_buf.str; + } + else { + storage->normal = rspamd_fstring_append (storage->normal, data, len); + + /* Append might cause realloc */ + msg->body_buf.begin = storage->normal->str; + msg->body_buf.len = storage->normal->len; + msg->body_buf.str = storage->normal->str; + } + + return TRUE; +} + + void rspamd_http_message_free (struct rspamd_http_message *msg) { struct rspamd_http_header *hdr, *tmp_hdr; + union _rspamd_storage_u *storage; + struct stat st; LL_FOREACH_SAFE (msg->headers, hdr, tmp_hdr) { @@ -1910,9 +2108,31 @@ rspamd_http_message_free (struct rspamd_http_message *msg) g_slice_free1 (sizeof (*hdr->value), hdr->value); g_slice_free1 (sizeof (struct rspamd_http_header), hdr); } - if (msg->body != NULL) { - rspamd_fstring_free (msg->body); + + if (msg->body_buf.len != 0) { + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + storage = &msg->body_buf.c; + + if (storage->shared.shm_fd != -1) { + g_assert (fstat (storage->shared.shm_fd, &st) != -1); + + if (msg->body_buf.str != MAP_FAILED) { + munmap (msg->body_buf.str, st.st_size); + } + + close (storage->shared.shm_fd); + } + + if (storage->shared.shm_name != NULL) { + shm_unlink (storage->shared.shm_name); + g_free (storage->shared.shm_name); + } + } + else { + rspamd_fstring_free (msg->body_buf.c.normal); + } } + if (msg->url != NULL) { rspamd_fstring_free (msg->url); } @@ -2046,7 +2266,7 @@ rspamd_http_router_error_handler (struct rspamd_http_connection *conn, msg = rspamd_http_new_message (HTTP_RESPONSE); msg->date = time (NULL); msg->code = err->code; - msg->body = rspamd_fstring_new_init (err->message, strlen (err->message)); + rspamd_http_message_set_body (msg, err->message, strlen (err->message)); rspamd_http_connection_reset (entry->conn); rspamd_http_connection_write_message (entry->conn, msg, @@ -2160,14 +2380,7 @@ rspamd_http_router_try_file (struct rspamd_http_connection_entry *entry, reply_msg->date = time (NULL); reply_msg->code = 200; - reply_msg->body = rspamd_fstring_sized_new (st.st_size); - reply_msg->body->len = st.st_size; - reply_msg->body_buf.len = st.st_size; - reply_msg->body_buf.begin = reply_msg->body->str; - - if (read (fd, reply_msg->body->str, st.st_size) != st.st_size) { - close (fd); - rspamd_http_message_free (reply_msg); + if (!rspamd_http_message_set_body_from_fd (reply_msg, fd)) { return FALSE; } @@ -2239,7 +2452,7 @@ rspamd_http_router_finish_handler (struct rspamd_http_connection *conn, err_msg = rspamd_http_new_message (HTTP_RESPONSE); err_msg->date = time (NULL); err_msg->code = err->code; - err_msg->body = rspamd_fstring_new_init (err->message, + rspamd_http_message_set_body (err_msg, err->message, strlen (err->message)); rspamd_http_connection_reset (entry->conn); rspamd_http_connection_write_message (entry->conn, diff --git a/src/libutil/http.h b/src/libutil/http.h index d9fb73b82..1d4e0f886 100644 --- a/src/libutil/http.h +++ b/src/libutil/http.h @@ -47,7 +47,15 @@ struct rspamd_http_header { /** * Legacy spamc protocol */ -#define RSPAMD_HTTP_FLAG_SPAMC 1 << 1 +#define RSPAMD_HTTP_FLAG_SPAMC (1 << 0) +/** + * Store body of the message in a shared memory segment + */ +#define RSPAMD_HTTP_FLAG_SHMEM (1 << 2) +/** + * Store body of the message in an immutable shared memory segment + */ +#define RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE (1 << 3) /** * HTTP message structure, used for requests and replies @@ -57,8 +65,25 @@ struct rspamd_http_message { rspamd_fstring_t *host; rspamd_fstring_t *status; struct rspamd_http_header *headers; - rspamd_fstring_t *body; - rspamd_ftok_t body_buf; + + struct _rspamd_body_buf_s { + /* Data start */ + const gchar *begin; + /* Data len */ + gsize len; + /* Data buffer (used to write data inside) */ + gchar *str; + + /* Internal storage */ + union _rspamd_storage_u { + rspamd_fstring_t *normal; + struct { + gchar *shm_name; + gint shm_fd; + } shared; + } c; + } body_buf; + struct rspamd_cryptobox_pubkey *peer_key; time_t date; time_t last_modified; @@ -276,6 +301,44 @@ struct rspamd_http_message * rspamd_http_new_message (enum http_parser_type type */ struct rspamd_http_message* rspamd_http_message_from_url (const gchar *url); +/** + * Returns body for a message + * @param msg + * @param blen pointer where to save body length + * @return pointer to body start + */ +const gchar *rspamd_http_message_get_body (struct rspamd_http_message *msg, + gsize *blen); + +/** + * Set message's body from the string + * @param msg + * @param data + * @param len + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body (struct rspamd_http_message *msg, + const gchar *data, gsize len); + +/** + * Maps fd as message's body + * @param msg + * @param fd + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body_from_fd (struct rspamd_http_message *msg, + gint fd); + +/** + * Appends data to message's body + * @param msg + * @param data + * @param len + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_append_body (struct rspamd_http_message *msg, + const gchar *data, gsize len); + /** * Append a header to reply * @param rep -- 2.39.5